{
  "title": "Ropedia Xperience-10M Task Suite Project Packet",
  "version": "2026-06-01",
  "scope_status": {
    "validated_data": "one public Xperience-10M sample episode",
    "aligned_frames": 5821,
    "sliding_windows": 1161,
    "current_feature_dimensions": 8378,
    "core_task_count": 12,
    "neural_head_count": 12,
    "direction_extension_probe_count": 4,
    "raw_xperience10m_data_in_repo": false,
    "audio_feature_status": "Audio is present in the sample MP4 streams and shown in the figures, but the current baseline feature vector does not include an extracted audio block.",
    "qwen3_omni_32_episode_claim": false,
    "qwen3_omni_status": "Readiness-only until at least 32 valid episodes are available and held-out episode evaluation finishes."
  },
  "review_path": [
    {
      "step": 1,
      "question": "What exactly is being claimed?",
      "primary_artifacts": [
        "PROJECT_STATUS.md",
        "docs/data/project_status.json",
                "EVIDENCE_CONTRACT.md",
                "ARTIFACT_GUIDE.md",
                "EVALUATION_PROTOCOL.md",
                "FIGURE_INDEX.md",
                "SOURCE_ALIGNMENT_AUDIT.md",
                "XPERIENCE10M_DATASET_CARD_ALIGNMENT.md",
                "docs/data/evidence_contract.json",
                "docs/data/artifact_index.json",
                "docs/data/brand_assets.json",
                "docs/data/evaluation_protocol.json",
                "docs/data/figure_index.json",
                "docs/data/source_alignment_audit.json",
                "docs/data/xperience10m_dataset_card_alignment.json",
        "docs/data/mirror_parity.json",
        "docs/data/publication_audit.json",
        "docs/data/scope_claims_audit.json",
        "docs/data/website_integrity.json"
      ],
      "readout": "The project status table gives the compact current-state summary. Single-episode task engineering, metrics, visualizations, publication hygiene, local website integrity, mirror parity, and scope-claim guarding are claimed; cross-episode generalization and 32-episode Qwen3-Omni quality are not claimed."
    },
    {
      "step": 2,
            "question": "What do the official Xperience-10M dataset and sample cards say?",
      "primary_artifacts": [
        "XPERIENCE10M_DATASET_CARD_ALIGNMENT.md",
        "docs/data/xperience10m_dataset_card_alignment.json",
                "https://huggingface.co/datasets/ropedia-ai/xperience-10m",
                "https://huggingface.co/datasets/ropedia-ai/xperience-10m-sample"
            ],
            "readout": "The full upstream dataset is a manually gated large-scale 4D multimodal egocentric source. The public sample card records the sample license, HOMIE Toolkit path, and Rerun 0.29.0 visualization path. This repo validates only one public sample episode and records unsupported areas explicitly."
    },
        {
            "step": 3,
            "question": "Are source facts consistently presented?",
            "primary_artifacts": [
                "SOURCE_ALIGNMENT_AUDIT.md",
                "docs/data/source_alignment_audit.json",
                "scripts/validate_source_alignment.py"
            ],
            "readout": "The source-alignment report checks full-dataset metadata, API-listing caveats, public sample license/tooling, and boundary markers across repo docs, website, and HF cards."
        },
        {
            "step": 4,
            "question": "How exactly are the tasks evaluated?",
            "primary_artifacts": [
                "EVALUATION_PROTOCOL.md",
                "docs/data/evaluation_protocol.json",
                "scripts/build_evaluation_protocol.py"
            ],
            "readout": "The protocol fixes the 20-frame window unit, chronological split, train-only normalization, leakage controls, per-task input/target/metric contracts, and unsupported interpretations."
        },
        {
            "step": 5,
            "question": "How can the public pipeline be reproduced?",
      "primary_artifacts": [
        "REPRODUCIBILITY.md",
        "docs/data/reproducibility_matrix.json",
        "notes/reproducibility_audit.md"
      ],
      "readout": "The public sample pipeline has explicit commands, expected outputs, and a prior exact-match reproduction check over the committed metrics."
    },
        {
            "step": 6,
            "question": "What is inside one model input?",
      "primary_artifacts": [
        "results/episode_task_suite/windows.csv",
        "results/episode_task_suite/feature_manifest.json",
        "results/episode_task_suite/available_modalities.json",
        "docs/data/modality_atlas.json"
      ],
      "readout": "The current model input is an 8,378-dimensional aligned window vector with explicit feature-block boundaries, and the readable atlas shows each public-sample modality without raw data redistribution."
    },
        {
            "step": 7,
            "question": "Do the task metrics have committed evidence?",
      "primary_artifacts": [
        "results/episode_task_suite/summary_report.json",
        "results/episode_task_suite/neural_mlp/",
        "docs/data/summary_metrics.json"
      ],
      "readout": "Each of the 12 tasks has minimal-head metrics and a matching neural MLP result over the same window contracts."
    },
        {
            "step": 8,
            "question": "How should this scale beyond one episode?",
      "primary_artifacts": [
        "results/omni_finetune/DATA_BLOCKER_REPORT.md",
        "results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md",
        "scripts/omni/discover_xperience10m_sources.py"
      ],
      "readout": "The next milestone is a 32-episode held-out-episode Qwen3-Omni LoRA pilot after gated Xperience-10M access is available."
    }
  ],
    "project_status": "PROJECT_STATUS.md",
    "project_status_json": "docs/data/project_status.json",
    "evaluation_protocol": "EVALUATION_PROTOCOL.md",
    "evaluation_protocol_json": "docs/data/evaluation_protocol.json",
    "source_alignment_audit": "SOURCE_ALIGNMENT_AUDIT.md",
    "source_alignment_audit_json": "docs/data/source_alignment_audit.json",
    "artifact_guide": "ARTIFACT_GUIDE.md",
  "artifact_index": "docs/data/artifact_index.json",
  "brand_assets": "docs/data/brand_assets.json",
  "figure_index": "FIGURE_INDEX.md",
  "figure_index_json": "docs/data/figure_index.json",
  "reproducibility_matrix": "docs/data/reproducibility_matrix.json",
  "public_surfaces": {
    "github_repo": "https://github.com/ChaoYue0307/ropedia-xperience-10m-task-suite",
    "github_pages": "https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/",
    "hf_space": "https://huggingface.co/spaces/cy0307/ropedia-xperience-10m-task-suite",
    "hf_static_app": "https://cy0307-ropedia-xperience-10m-task-suite.static.hf.space/",
    "hf_artifacts": "https://huggingface.co/datasets/cy0307/ropedia-xperience-10m-task-suite-artifacts",
    "hf_model_baselines": "https://huggingface.co/cy0307/ropedia-xperience-10m-task-baselines"
  },
  "do_not_infer": [
    "Do not infer cross-environment generalization from the single public sample episode.",
    "Do not treat the Qwen3-Omni readiness run as a 32-episode fine-tune.",
    "Do not treat feature-vector reconstruction as pixel-depth, mesh, NeRF, or Gaussian reconstruction.",
    "Do not assume raw Xperience-10M data is redistributed in this repo."
  ]
}
