Source code for minedojo.sim.sim

import uuid
from copy import deepcopy
from typing import Union, Optional, List, Dict, Tuple, Literal, Any

import cv2
import gym
import numpy as np
from lxml import etree

from .mc_meta import mc
from . import handlers
from .bridge import BridgeEnv
from .cmd_executor import CMDExecutor
from .config_sim_spec import SimSpec
from .inventory import InventoryItem, parse_inventory_item


[docs]class MineDojoSim(gym.Env):
    """An environment wrapper for MineDojo simulation.

    Args:
        allow_mob_spawn: If ``True``, allow mobs (animals and hostiles) to spawn.
                Default: ``True``.

        allow_time_passage: Time flows if ``True``.
                Default: ``True``.

        break_speed_multiplier: Controls the speed of breaking blocks. A value larger than 1.0 accelerates the breaking.
                Default: ``1.0``.

        drawing_str: Draws shapes (e.g. spheres, cuboids) in the minecraft world.
                Default: ``None``.

        event_level_control: If ``True``, the agent is able to perform high-level controls including place and equip.
                If ``False``, then is keyboard-mouse level control.
                Default: ``True``.

        flat_world_seed_string: A string that specifies how we want the world layers to be created
                if ``generate_world_type`` is "flat".
                One can use the `tool <https://minecraft.tools/en/flat.php?biome=1&bloc_1_nb=1&bloc_1_id=2&bloc_2_nb=2&bloc_2_id=3%2F00&bloc_3_nb=1&bloc_3_id=7&village_size=1&village_distance=32&mineshaft_chance=1&stronghold_count=3&stronghold_distance=32&stronghold_spread=3&oceanmonument_spacing=32&oceanmonument_separation=5&biome_1_distance=32&valid=Create+the+Preset#seed>`_ to generate.
                Default: ``None``.

        generate_world_type: A string that specifies the type of the minecraft world.
                One of ``"default"``, ``"flat"``, ``"from_file"``, ``"specified_biome"``.
                Default: ``"default"``.

        image_size: The size of image observations.

        initial_inventory: If not ``None``, specifies initial items in the agent's inventory.
                Use class ``InventoryItem`` to specify items.
                Default: ``None``.

        initial_weather: If not ``None``, specifies the initial weather.
                Can be one of ``"clear"``, ``"normal"``, ``"rain"``, ``"thunder"``.
                Default: ``None``.

        lidar_rays: Defines the directions and maximum distances of the lidar rays if ``use_lidar`` is ``True``.
                If supplied, should be a list of tuple(pitch, yaw, distance).
                Pitch and yaw are in radians and relative to agent looking vector.
                Default: ``None``.

        raise_error_on_invalid_cmds: If ``True``, the cmd executor will raise error when a command is invalid.
                If ``False``, the executor will just skip instead.
                Default: ``False``.

        regenerate_world_after_reset: If ``True``, the minecraft world will be re-generated when resetting.
                Default: ``False``.

        seed: The seed for an instance's internal random number generator.
                Default: ``None``.

        sim_name: Name of a simulation instance.
                Default: ``"MineDojoSim"``.

        spawn_in_village: If ``True``, the agent will spawn in a village.
                Default: ``False``.

        specified_biome: If not ``None``, specifies the biome of the minecraft world by a string or an integer.
                Default: ``None``.

        start_food: If not ``None``, specifies initial food of the agent.
                Default: ``None``.

        start_health: If not ``None``, specifies initial health of the agent.
                Default: ``None``.

        start_position: If not ``None``, specifies the agent's initial location and orientation.
                If provided, should be a dict with keys ``x``, ``y``, ``z``, ``yaw``, ``pitch``.
                ``yaw`` and ``pitch`` are in degrees.
                Default: ``None``.

        start_time: If not ``None``, specifies the time when the agent spawns.
                If supplied, should be an int between 0 and 24000.
                See `here <https://minecraft.fandom.com/wiki/Daylight_cycle>`_ for more information.
                Default: ``None``.

        use_depth: If ``True``, includes depth map in observations.
                Default: ``False``.

        use_lidar: If ``True``, includes lidar in observations.
                Default: ``False``.

        use_voxel: If ``True``, includes voxel in observations.
                Default: ``False``.

        voxel_size: Defines the voxel's range in each axis if ``use_voxel`` is ``True``.
                If supplied, should be a dict with keys ``xmin``, ``xmax``, ``ymin``, ``ymax``, ``zmin``, ``zmax``.
                Each value specifies the voxel size relative to the agent.
                Default: ``None``.

        world_file_path: The path to the world file if ``generate_world_type`` is ``"from_file"``.
                Default: ``None``.

        world_seed: Seed for deterministic world generation
                if ``generate_world_type`` is ``"default"`` or ``"specified_biome"``.
                See `here <https://minecraft.fandom.com/wiki/Seed_(level_generation)>`_ for more details.
                Default: ``None``.
    """

    def __init__(
        self,
        *,
        # ------ initial conditions ------
        initial_inventory: Optional[List[InventoryItem]] = None,
        start_position: Optional[Dict[str, Union[float, int]]] = None,
        start_health: Optional[float] = None,
        start_food: Optional[int] = None,
        start_time: Optional[int] = None,
        initial_weather: Optional[Literal["normal", "clear", "rain", "thunder"]] = None,
        spawn_in_village: bool = False,
        drawing_str: Optional[str] = None,
        # ------ global conditions ------
        break_speed_multiplier: float = 1.0,
        allow_time_passage: bool = True,
        allow_mob_spawn: bool = True,
        # ------ world generation ------
        generate_world_type: Literal[
            "default", "from_file", "flat", "specified_biome"
        ] = "default",
        regenerate_world_after_reset: bool = False,
        world_seed: Optional[Union[str, int]] = None,
        world_file_path: Optional[str] = None,
        flat_world_seed_string: Optional[str] = None,
        specified_biome: Optional[Union[int, str]] = None,
        # ------ observation ------
        image_size: Union[int, Tuple[int, int]],
        use_voxel: bool = False,
        voxel_size: Optional[Dict[str, int]] = None,
        use_lidar: bool = False,
        lidar_rays: Optional[List[Tuple[float, float, float]]] = None,
        use_depth: bool = False,
        # ------ control ------
        event_level_control: bool = True,
        # ------ randomness ------
        seed: Optional[int] = None,
        # ------ misc ------
        sim_name: str = "MineDojoSim",
        raise_error_on_invalid_cmds: bool = False,
    ):
        self._sim_name = sim_name
        self._rng = np.random.default_rng(seed)
        if isinstance(image_size, int):
            image_size = (image_size, image_size)
        if use_voxel:
            voxel_size = voxel_size or dict(
                xmin=-1, ymin=-1, zmin=-1, xmax=1, ymax=1, zmax=1
            )
        if use_lidar:
            lidar_rays = lidar_rays or [(0.0, 0.0, 10.0)]
        assert not use_depth, "TODO: fix depth obs bug"
        start_health = start_health or 20.0
        start_food = start_food or 20
        assert generate_world_type in {
            "default",
            "from_file",
            "flat",
            "specified_biome",
        }, f"invalid generate world type {generate_world_type}"
        if generate_world_type == "default":
            world_seed = world_seed or ""
        elif generate_world_type == "from_file":
            assert (
                world_file_path is not None
            ), f'A world file path must be provided when `generate_world_type = "from_file"`'
        elif generate_world_type == "flat":
            flat_world_seed_string = flat_world_seed_string or ""
        elif generate_world_type == "specified_biome":
            assert (
                specified_biome is not None
            ), f"must provide a biome when generate_world_type = 'specified_biome'!"
            world_seed = world_seed or ""
        else:
            raise ValueError(f"Unknown world generation type {generate_world_type}")
        if specified_biome is not None:
            if isinstance(specified_biome, str):
                assert (
                    specified_biome in mc.BIOMES_MAP
                ), f"Unknown biome name {specified_biome}"
                specified_biome = mc.BIOMES_MAP[specified_biome]
            elif isinstance(specified_biome, int):
                assert (
                    specified_biome in mc.BIOMES_MAP.values()
                ), f"Invalid biome id {specified_biome}"
            else:
                raise ValueError(f"invalid biome type {specified_biome}")

        # configure obs handlers
        obs_handlers = [
            handlers.POVObservation(image_size, False),
            handlers.TrueFlatInventoryObservation(),
            handlers.EquipmentObservation(),
            handlers.ObservationFromLifeStats(),
            handlers.ObservationFromCurrentLocation(),
            handlers.ObserveFromFullStats(),
            handlers.NearbyToolsObservation(),
            handlers.ObservationFromDamageSource(),
        ]
        if use_voxel:
            voxel_size = (
                (voxel_size["xmin"], voxel_size["xmax"]),
                (voxel_size["ymin"], voxel_size["ymax"]),
                (voxel_size["zmin"], voxel_size["zmax"]),
            )
            obs_handlers.append(handlers.VoxelObservation(voxel_size))
        if use_lidar:
            obs_handlers.append(handlers.RichLidarObservation(lidar_rays))
        # configure action handlers
        common_actions = [
            "forward",
            "back",
            "left",
            "right",
            "jump",
            "sneak",
            "sprint",
            "use",
            "attack",
            "drop",
        ]
        action_handlers = [
            handlers.CameraAction(),
            handlers.SmeltAction(
                ["none"] + mc.ALL_SMELTING_ITEMS, _other="none", _default="none"
            ),
            handlers.CraftAction(
                ["none"] + mc.ALL_PERSONAL_CRAFTING_ITEMS,
                _other="none",
                _default="none",
            ),
            handlers.CraftWithTableAction(
                ["none"] + mc.ALL_CRAFTING_TABLE_ITEMS, _other="none", _default="none"
            ),
        ]
        action_handlers.extend(
            [
                handlers.KeybasedCommandAction(k, mc.INVERSE_KEYMAP[k])
                for k in common_actions
            ]
        )
        if event_level_control:
            action_handlers.extend(
                [
                    handlers.EquipAction(
                        ["none"] + mc.ALL_ITEMS, _other="none", _default="none"
                    ),
                    handlers.PlaceBlock(
                        ["none"] + mc.ALL_ITEMS, _other="none", _default="none"
                    ),
                ]
            )
        else:
            action_handlers.append(handlers.SwapSlotAction())
            action_handlers.append(
                handlers.KeybasedCommandAction(
                    "pickItem", mc.INVERSE_KEYMAP["pickItem"]
                )
            )
            action_handlers.extend(
                [
                    handlers.KeybasedCommandAction(
                        f"hotbar.{i}", mc.INVERSE_KEYMAP[str(i)]
                    )
                    for i in range(1, 10)
                ]
            )
        # configure agent handlers
        agent_handlers = []
        # configure agent start handlers, e.g., initial inventory
        self.start_health, self.start_food = start_health, start_food
        agent_start_handlers = [
            handlers.LowLevelInputsAgentStart(),
            handlers.AgentStartBreakSpeedMultiplier(break_speed_multiplier),
            handlers.StartingHealthAgentStart(health=start_health),
            handlers.StartingFoodAgentStart(food=start_food),
        ]
        self.initial_inventory = initial_inventory
        if initial_inventory is not None:
            initial_inventory = [
                parse_inventory_item(item) for item in initial_inventory
            ]
            agent_start_handlers.append(
                handlers.InventoryAgentStart(
                    {
                        inventory_item[0]: inventory_item[1]
                        for inventory_item in initial_inventory
                    }
                )
            )
        self.start_position = start_position
        if start_position is not None:
            agent_start_handlers.append(
                handlers.AgentStartPlacement(
                    x=start_position["x"],
                    y=start_position["y"],
                    z=start_position["z"],
                    yaw=start_position["yaw"],
                    pitch=start_position["pitch"],
                )
            )
        # configure server initial conditions handlers
        self.start_time = start_time
        server_initial_conditions_handlers = [
            handlers.TimeInitialCondition(
                allow_passage_of_time=allow_time_passage, start_time=start_time
            ),
            handlers.SpawningInitialCondition(allow_mob_spawn),
        ]
        self.initial_weather = initial_weather
        if initial_weather is not None:
            server_initial_conditions_handlers.append(
                handlers.WeatherInitialCondition(initial_weather)
            )
        # configure world generator handlers
        world_generator_handlers = []
        if generate_world_type == "default":
            world_generator_handlers.append(
                handlers.DefaultWorldGenerator(regenerate_world_after_reset, world_seed)
            )
        elif generate_world_type == "from_file":
            world_generator_handlers.append(
                handlers.FileWorldGenerator(
                    world_file_path,
                    destroy_after_use=False,
                )
            )
        elif generate_world_type == "flat":
            world_generator_handlers.append(
                handlers.FlatWorldGenerator(
                    force_reset=True, generatorString=flat_world_seed_string
                )
            )
        elif generate_world_type == "specified_biome":
            world_generator_handlers.append(
                handlers.BiomeGenerator(
                    specified_biome, force_reset=True, world_seed=world_seed
                )
            )
        else:
            raise ValueError()
        if drawing_str is not None:
            world_generator_handlers.append(handlers.DrawingDecorator(drawing_str))
        # configure server decorator handlers
        server_decorator_handlers = []
        if spawn_in_village:
            server_decorator_handlers.append(handlers.VillageSpawnDecorator())
        # configure server quit handlers
        server_quit_handlers = []
        self._sim_spec = SimSpec(
            sim_name=sim_name,
            agent_count=1,
            obs_handlers=obs_handlers,
            action_handlers=action_handlers,
            agent_handlers=agent_handlers,
            agent_start_handlers=agent_start_handlers,
            server_initial_conditions_handlers=server_initial_conditions_handlers,
            world_generator_handlers=world_generator_handlers,
            server_decorator_handlers=server_decorator_handlers,
            server_quit_handlers=server_quit_handlers,
            seed=self.new_seed,
        )

        self._bridge_env = BridgeEnv(is_fault_tolerant=True, seed=self.new_seed)

        self._prev_obs = None
        self._prev_action = None
        self._prev_info = None

        self._cmd_executor = CMDExecutor(self, raise_error_on_invalid_cmds)

    @property
    def observation_space(self):
        return self._sim_spec.observation_space

    @property
    def action_space(self):
        return self._sim_spec.action_space

    @property
    def new_seed(self):
        return self._rng.integers(low=0, high=2**31 - 1).item()

[docs]    def seed(self, seed: int = None):
        """Sets the seed for this env’s random number generator.

        Args:
            seed: The seed for the number generator
        """
        self._rng = np.random.default_rng(seed)

[docs]    def reset(self):
        """Resets the environment to an initial state and returns an initial observation.

        Return:
            Agent’s initial observation.
        """
        episode_id = str(uuid.uuid4())

        xml = etree.fromstring(self._sim_spec.to_xml(episode_id))
        raw_obs = self._bridge_env.reset(episode_id, [xml])[0]
        obs, info = self._process_raw_obs(raw_obs)
        self._prev_obs, self._prev_info = deepcopy(obs), deepcopy(info)
        return obs

[docs]    def step(self, action: dict):
        """Run one timestep of the environment’s dynamics. Accepts an action and returns next_obs, reward, done, info.

        Args:
            action: The action of the agent in current step.

        Return:
            A tuple (obs, reward, done, info)
            - ``dict`` - Agent’s next observation.
            - ``float`` - Amount of reward returned after executing previous action.
            - ``bool`` - Whether the episode has ended.
            - ``dict`` - Contains auxiliary diagnostic information (helpful for debugging, and sometimes learning).
        """
        self._prev_action = deepcopy(action)
        action_xml = self._action_obj_to_xml(action)
        step_tuple = self._bridge_env.step([action_xml])
        step_success, raw_obs = step_tuple.step_success, step_tuple.raw_obs
        if not step_success:
            # when step failed, return prev obs
            return self._prev_obs, 0, True, self._prev_info
        else:
            obs, info = self._process_raw_obs(raw_obs[0])
            self._prev_obs, self._prev_info = deepcopy(obs), deepcopy(info)
            return obs, 0, self.is_terminated, info

[docs]    def execute_cmd(self, cmd: str, action: Optional[dict] = None):
        """Execute a given string command.

        Args:
            cmd: The string command accepted by the Minecraft client.
            action: An action that will be simultaneously executed with the command.

        Return:
            A tuple (obs, reward, done, info)
            - ``dict`` - Agent’s observation of the current environment.
            - ``float`` - Amount of reward returned after previous action.
            - ``bool`` - Whether the episode has ended.
            - ``dict`` - Contains auxiliary diagnostic information (helpful for debugging, and sometimes learning).
        """
        return self._cmd_executor.execute_cmd(cmd, action)

[docs]    def spawn_mobs(
        self,
        mobs: Union[str, List[str]],
        rel_positions: Union[np.ndarray, list],
        action: Optional[dict] = None,
    ):
        """Spawn mobs in the world.

        Args:
            mobs: The names of the mobs to spawn
            rel_positions: The mobs' positions relative to the agent
            action: An action that will be simultaneously executed with the spawning
        Return:
            A tuple (obs, reward, done, info)
            - ``dict`` - Agent’s observation of the current environment.
            - ``float`` - Amount of reward returned after previous action.
            - ``bool`` - Whether the episode has ended.
            - ``dict`` - Contains auxiliary diagnostic information (helpful for debugging, and sometimes learning).
        """
        return self._cmd_executor.spawn_mobs(mobs, rel_positions, action)

[docs]    def set_block(
        self,
        blocks: Union[str, List[str]],
        rel_positions: Union[np.ndarray, list],
        action: Optional[dict] = None,
    ):
        """Set blocks in the world.

        Args:
            blocks: The names of the blocks to set
            rel_positions: The blocks' positions relative to the agent
            action: An action that will be simultaneously executed with the setting
        Return:
            A tuple (obs, reward, done, info)
            - ``dict`` - Agent’s observation of the current environment.
            - ``float`` - Amount of reward returned after previous action.
            - ``bool`` - Whether the episode has ended.
            - ``dict`` - Contains auxiliary diagnostic information (helpful for debugging, and sometimes learning).
        """
        return self._cmd_executor.set_block(blocks, rel_positions, action)

[docs]    def clear_inventory(self, action: Optional[dict] = None):
        """Remove all items in the agent's inventory.

        Args:
            action: An action that will be simultaneously executed
        Return:
            A tuple (obs, reward, done, info)
            - ``dict`` - Agent’s observation of the current environment.
            - ``float`` - Amount of reward returned after previous action.
            - ``bool`` - Whether the episode has ended.
            - ``dict`` - Contains auxiliary diagnostic information (helpful for debugging, and sometimes learning).
        """
        return self._cmd_executor.clear_inventory(action)

[docs]    def set_inventory(
        self, inventory_list: List[InventoryItem], action: Optional[dict] = None
    ):
        """Set items to the agent's inventory.

        Args:
            inventory_list: List of ``InventoryItem`` to change the inventory status
            action: An action that will be simultaneously executed
        Return:
            A tuple (obs, reward, done, info)
            - ``dict`` - Agent’s observation of the current environment.
            - ``float`` - Amount of reward returned after previous action.
            - ``bool`` - Whether the episode has ended.
            - ``dict`` - Contains auxiliary diagnostic information (helpful for debugging, and sometimes learning).
        """
        return self._cmd_executor.set_inventory(inventory_list, action)

[docs]    def teleport_agent(self, x, y, z, yaw, pitch, action: Optional[dict] = None):
        """Teleport the agent to a given position.

        Args:
            x: x coordinate of the destination
            y: y coordinate of the destination
            z: z coordinate of the destination
            yaw: yaw of the targeted orientation
            pitch: pitch of the targeted orientation
            action: An action that will be simultaneously executed with the teleporting
        Return:
            A tuple (obs, reward, done, info)
            - ``dict`` - Agent’s observation of the current environment.
            - ``float`` - Amount of reward returned after previous action.
            - ``bool`` - Whether the episode has ended.
            - ``dict`` - Contains auxiliary diagnostic information (helpful for debugging, and sometimes learning).
        """
        return self._cmd_executor.teleport_agent(x, y, z, yaw, pitch, action)

[docs]    def kill_agent(self, action: Optional[dict] = None):
        """Kill the agent.

        Args:
            action: An action that will be simultaneously executed
        Return:
            A tuple (obs, reward, done, info)
            - ``dict`` - Agent’s observation of the current environment.
            - ``float`` - Amount of reward returned after previous action.
            - ``bool`` - Whether the episode has ended.
            - ``dict`` - Contains auxiliary diagnostic information (helpful for debugging, and sometimes learning).
        """
        return self._cmd_executor.kill_agent(action)

[docs]    def set_time(self, time: int, action: Optional[dict] = None):
        """Set the world with the given time.

        Args:
            time: The target time
            action: An action that will be simultaneously executed
        Return:
            A tuple (obs, reward, done, info)
            - ``dict`` - Agent’s observation of the current environment.
            - ``float`` - Amount of reward returned after previous action.
            - ``bool`` - Whether the episode has ended.
            - ``dict`` - Contains auxiliary diagnostic information (helpful for debugging, and sometimes learning).
        """
        return self._cmd_executor.set_time(time, action)

[docs]    def set_weather(self, weather: str, action: Optional[dict] = None):
        """Set the world with the given weather.

        Args:
            weather: The target weather
            action: An action that will be simultaneously executed
        Return:
            A tuple (obs, reward, done, info)
            - ``dict`` - Agent’s observation of the current environment.
            - ``float`` - Amount of reward returned after previous action.
            - ``bool`` - Whether the episode has ended.
            - ``dict`` - Contains auxiliary diagnostic information (helpful for debugging, and sometimes learning).
        """
        return self._cmd_executor.set_weather(weather, action)

[docs]    def random_teleport(self, max_range: int, action: Optional[dict] = None):
        """Teleport the agent randomly.

        Args:
            max_range: The maximum distance on each horizontal axis from the center of the area to spread targets
                       (thus, the area is square, not circular)
            action: An action that will be simultaneously executed
        Return:
            A tuple (obs, reward, done, info)
            - ``dict`` - Agent’s observation of the current environment.
            - ``float`` - Amount of reward returned after previous action.
            - ``bool`` - Whether the episode has ended.
            - ``dict`` - Contains auxiliary diagnostic information (helpful for debugging, and sometimes learning).
        """
        return self._cmd_executor.random_teleport(max_range, action)

[docs]    def close(self):
        """Environments will automatically close() themselves when garbage collected or when the program exits."""
        self._bridge_env.close()

[docs]    def render(self, mode: str = "human"):
        """Renders the environment.

        Args:
            mode: The mode to render with.
        """
        img = self._prev_obs["rgb"]
        img = img.transpose((1, 2, 0))
        img = img[:, :, ::-1]
        cv2.imshow(f"{self._sim_name}", img)
        cv2.waitKey(1)

    @property
    def prev_obs(self):
        return self._prev_obs

    @property
    def prev_info(self):
        return self._prev_info

    @property
    def prev_action(self):
        return self._prev_action

    @property
    def is_terminated(self):
        return self._bridge_env.is_terminated

    def _process_raw_obs(self, raw_obs: dict):
        info = deepcopy(raw_obs)
        if "pov" in info:
            info.pop("pov")
        obs_dict = {
            h.to_string(): h.from_hero(raw_obs) for h in self._sim_spec.observables
        }
        return obs_dict, info

    def _action_obj_to_xml(self, action):
        parsed_action = [f'chat {action["chat"]}'] if "chat" in action else []
        parsed_action.extend(
            [h.to_hero(action[h.to_string()]) for h in self._sim_spec.actionables]
        )
        return "\n".join(parsed_action)