{
  "date": "2026-06-24",
  "base_commit": "76e1da24e73f314808cadfe7e6ef36755a7a181f",
  "working_tree_changes_required": true,
  "direct_render": false,
  "official_baseline_execution": false,
  "task_render_accepted": true,
  "official_baseline_evaluable": false,
  "summary": "P1 moved the LabUtopia POC from black or cluttered frames to three evaluator readback-visible task frames. Static USD placement, nested DryingBox handle discovery, task-level visibility isolation, and task-specific cameras are now sufficient for the three Franka POC task render gates. The latest formal diagnostics for pick, place, and open_door all report render_validation.passed=true and task_render_accepted=true. The official Lift2 baseline remains not evaluable because Lift2 composite assets and the official runner have not been validated.",
  "environment": {
    "conda_env": "/cpfs/shared/simulation/zhuzihou/dev/conda-managed/envs/embodied-eval-os-sim-isaacsim41-genmanip-py310",
    "config": "ebench/labutopia_lab_poc/franka_poc",
    "asset_root": "/cpfs/shared/simulation/zhuzihou/dev/_datasets/EBench-Assets-Overlay/labutopia_level1_poc/assets",
    "base_camera_config": "configs/cameras/labutopia_franka_poc.yml",
    "task_camera_configs": {
      "level1_pick": "configs/cameras/labutopia_franka_poc_pick.yml",
      "level1_place": "configs/cameras/labutopia_franka_poc_place.yml",
      "level1_open_door": "configs/cameras/labutopia_franka_poc_open_door.yml"
    },
    "runtime_isolation": {
      "ports_used_for_final_formal_runs": [18091, 18092, 18093],
      "eos_port_8087_touched": false
    }
  },
  "historical_failed_images": {
    "purpose": "Kept in the weekly HTML report as before/after evidence for PM readers.",
    "level1_pick": {
      "path": "docs/records/evidence/2026-06-22-labutopia-ebench-weekly-report/assets/labutopia-franka-level1-pick.jpg",
      "problem": "Target bottle was not identifiable, so the image could not explain the pick task."
    },
    "level1_place": {
      "path": "docs/records/evidence/2026-06-22-labutopia-ebench-weekly-report/assets/labutopia-franka-level1-place.jpg",
      "problem": "The beaker and target platform relation was missing, so the image could not explain the place task."
    },
    "level1_open_door": {
      "path": "docs/records/evidence/2026-06-22-labutopia-ebench-weekly-report/assets/labutopia-franka-level1-open-door.jpg",
      "problem": "The frame only showed a dark box corner; door, hinge, handle, and action point were not clear."
    }
  },
  "static_asset_layout": {
    "scene_opens": true,
    "objects_normalized_to_franka_workspace": true,
    "drying_box_handle": {
      "wrapper_prim_path": "/World/labutopia_level1_poc/obj_obj_DryingBox_01/handle",
      "top_level_duplicate_removed": true,
      "runtime_handle_scale": [0.045, 0.075, 0.25],
      "resolution": "The old oversized orange-panel handle was replaced with a slimmer high-contrast handle that remains a nested DryingBox part."
    }
  },
  "task_level_visibility_isolation": {
    "level1_pick": {
      "visible_task_objects": ["obj_conical_bottle02"],
      "hidden_non_task_objects": ["obj_beaker2", "obj_target_plat", "obj_DryingBox_01"]
    },
    "level1_place": {
      "visible_task_objects": ["obj_beaker2", "obj_target_plat"],
      "hidden_non_task_objects": ["obj_conical_bottle02", "obj_DryingBox_01"]
    },
    "level1_open_door": {
      "visible_task_objects": ["obj_DryingBox_01", "obj_DryingBox_01_handle"],
      "hidden_non_task_objects": ["obj_conical_bottle02", "obj_beaker2", "obj_target_plat"]
    }
  },
  "diagnostics": {
    "level1_pick": {
      "run_id": "labutopia_p1_gate_pick_formal_20260624_0001",
      "diagnostics_json": "saved/diagnostics/labutopia_p1_gate_pick_formal_20260624_0001/diagnostics.json",
      "boundary_classification": "readback_visible",
      "readback_frame": "saved/diagnostics/labutopia_p1_gate_pick_formal_20260624_0001/readback_after_get_eval_camera_data/camera2/00000.png",
      "report_image": "docs/records/evidence/2026-06-22-labutopia-ebench-weekly-report/assets/labutopia-franka-level1-pick-eval-readback-p1.png",
      "sha256": "3bc822dec8204b1aa278b844e101076dede956b50fd1019679b3770dada49c81",
      "channel_min": [61.0, 144.0, 147.0],
      "channel_max": [228.0, 228.0, 229.0],
      "channel_mean": [176.342, 177.134, 177.471],
      "nonzero_pixels": 262144,
      "unique_rgb_colors": 1210,
      "render_validation": {
        "passed": true,
        "required_objects": {
          "obj_conical_bottle02": {
            "bbox": [236, 222, 275, 299],
            "bbox_area_fraction": 0.01190185546875,
            "width_px": 40,
            "height_px": 78,
            "passed": true
          }
        }
      },
      "claim_boundary": {
        "task_render_accepted": true,
        "official_baseline_evaluable": false,
        "baseline_blockers": ["official_baseline_not_validated"]
      },
      "visual_review": "PASS",
      "visual_reason": "Task-level hiding leaves the target bottle clearly visible on the table."
    },
    "level1_place": {
      "run_id": "labutopia_p1_gate_place_formal_20260624_0001",
      "diagnostics_json": "saved/diagnostics/labutopia_p1_gate_place_formal_20260624_0001/diagnostics.json",
      "boundary_classification": "readback_visible",
      "readback_frame": "saved/diagnostics/labutopia_p1_gate_place_formal_20260624_0001/readback_after_get_eval_camera_data/camera2/00000.png",
      "report_image": "docs/records/evidence/2026-06-22-labutopia-ebench-weekly-report/assets/labutopia-franka-level1-place-eval-readback-p1.png",
      "sha256": "8e80231ab01ff998ec516e74a7187eca6ac028a8a3e887016d8b1d31ef1aa419",
      "channel_min": [57.0, 143.0, 116.0],
      "channel_max": [228.0, 228.0, 228.0],
      "channel_mean": [169.452, 170.169, 168.271],
      "nonzero_pixels": 262144,
      "unique_rgb_colors": 1334,
      "render_validation": {
        "passed": true,
        "required_objects": {
          "obj_beaker2": {
            "bbox": [251, 208, 289, 263],
            "bbox_area_fraction": 0.008331298828125,
            "width_px": 39,
            "height_px": 56,
            "passed": true
          },
          "obj_target_plat": {
            "bbox": [215, 398, 296, 459],
            "bbox_area_fraction": 0.0193939208984375,
            "width_px": 82,
            "height_px": 62,
            "passed": true
          }
        }
      },
      "claim_boundary": {
        "task_render_accepted": true,
        "official_baseline_evaluable": false,
        "baseline_blockers": ["official_baseline_not_validated"]
      },
      "visual_review": "PASS",
      "visual_reason": "The beaker and yellow target platform are visible in the same evaluator frame."
    },
    "level1_open_door": {
      "run_id": "labutopia_p1_gate_open_door_formal_20260624_0002",
      "diagnostics_json": "saved/diagnostics/labutopia_p1_gate_open_door_formal_20260624_0002/diagnostics.json",
      "boundary_classification": "readback_visible",
      "readback_frame": "saved/diagnostics/labutopia_p1_gate_open_door_formal_20260624_0002/readback_after_get_eval_camera_data/camera2/00000.png",
      "report_image": "docs/records/evidence/2026-06-22-labutopia-ebench-weekly-report/assets/labutopia-franka-level1-open-door-eval-readback-p1.png",
      "sha256": "da670293ef61e0136b3522a07c9f2421a0ec73bca79ac0304eb1adf818644502",
      "channel_min": [27.0, 28.0, 30.0],
      "channel_max": [228.0, 228.0, 228.0],
      "channel_mean": [162.518, 162.711, 163.53],
      "nonzero_pixels": 262144,
      "unique_rgb_colors": 4088,
      "render_validation": {
        "passed": true,
        "required_objects": {
          "obj_DryingBox_01": {
            "bbox": [85, 94, 426, 246],
            "bbox_area_fraction": 0.19960784912109375,
            "width_px": 342,
            "height_px": 153,
            "passed": true
          },
          "obj_DryingBox_01_handle": {
            "bbox": [246, 146, 273, 230],
            "bbox_area_fraction": 0.0090789794921875,
            "width_px": 28,
            "height_px": 85,
            "passed": true
          }
        }
      },
      "claim_boundary": {
        "task_render_accepted": true,
        "official_baseline_evaluable": false,
        "baseline_blockers": ["official_baseline_not_validated"]
      },
      "runtime_physics": {
        "articulation_joint_positions": [0.0],
        "expected_articulation_joint_positions": [0.0],
        "dof_names": ["RevoluteJoint"],
        "runtime_physics_stable": true
      },
      "visual_review": "PASS",
      "visual_reason": "The final thin-handle retake shows the DryingBox frame, door panel, and orange handle/action point clearly enough for the task render gate."
    }
  },
  "open_door_root_cause_review": {
    "problem": "The original DryingBox runtime asset could expose unstable articulation state and the early PM image did not show a useful door/handle target. An intermediate retake made the handle look like a broad orange panel.",
    "fix_sequence": [
      "Use a sanitized DryingBox surrogate with fixed base and one aligned revolute door joint.",
      "Replay the expected closed joint target so reset starts at [0.0].",
      "Keep the handle as a nested DryingBox child, move it to the non-hinge side, delete duplicate orange marker blocks, and reduce the handle scale to [0.045, 0.075, 0.25].",
      "Use the formal front camera for the final task render gate."
    ],
    "latest_resolution": {
      "runtime_joint_stable": true,
      "runtime_joint_target_matches": true,
      "single_physical_handle_visible": true,
      "duplicate_marker_removed": true,
      "task_render_accepted": true,
      "remaining_baseline_blocker": "official_baseline_not_validated"
    }
  },
  "browser_visual_review": {
    "tool": "Playwright 1.61 via npx with cached Chromium",
    "preview_url": "http://127.0.0.1:18080/records/evidence/2026-06-22-labutopia-ebench-weekly-report/index.html",
    "desktop_screenshot": "/tmp/labutopia_weekly_p1_gate_review_20260624/desktop_full_report.png",
    "tablet_screenshot": "/tmp/labutopia_weekly_p1_gate_review_20260624/tablet_full_report.png",
    "mobile_screenshot": "/tmp/labutopia_weekly_p1_gate_review_20260624/mobile_full_report.png",
    "desktop_audit_json": "/tmp/labutopia_weekly_p1_gate_review_20260624/desktop_audit.json",
    "tablet_audit_json": "/tmp/labutopia_weekly_p1_gate_review_20260624/tablet_audit.json",
    "mobile_audit_json": "/tmp/labutopia_weekly_p1_gate_review_20260624/mobile_audit.json",
    "result": "PASS_FOR_REPORT_DISPLAY",
    "notes": "Desktop, tablet, and mobile full-page audits passed: six report images loaded, required old/new/render-gate/baseline-boundary text was present, no failed requests or horizontal page overflow were detected, and stale render_validation_not_passed wording was absent from the HTML report."
  },
  "claim_boundary": {
    "allowed": [
      "All three Franka POC tasks now produce non-black evaluator camera2 readback frames.",
      "Static USD readback shows task objects and the nested DryingBox handle in plausible workspace coordinates.",
      "The old top-level duplicated handle payload is removed; the handle is represented as part of the DryingBox assembly.",
      "Task-level hiding makes pick and place readable as PM-facing task frames.",
      "The latest open_door runtime diagnostic has stable DryingBox joint positions, matches the expected closed target [0.0], only exposes RevoluteJoint, and shows the DryingBox frame, door panel, and thin orange handle in the same eval readback frame.",
      "The three Franka POC task render gates now pass with task_render_accepted=true."
    ],
    "blocked": [
      "Official Lift2 baseline evaluability.",
      "Any claim that task render acceptance equals policy success or score improvement.",
      "Any claim that the official Lift2 runner has been executed.",
      "Any claim that the current report display QA equals official baseline validation."
    ]
  },
  "next_steps": [
    "Build and validate the Lift2 composite asset root: LabUtopia scene overlay plus default robot_usds/lift2 plus default miscs/curobo.",
    "Locate and hash the official EBench/OpenPI/Lift2 runner entrypoint before executing any official-style loop.",
    "Run a Lift2 dry smoke in an isolated port and keep official_baseline_execution=false until the runner and assets are proven."
  ]
}
