Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

allow loading either nsel or natoms atomic tensor data #3394

Merged
merged 5 commits into from
Mar 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions deepmd/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def add_data_requirement(
repeat: int = 1,
default: float = 0.0,
dtype: Optional[np.dtype] = None,
output_natoms_for_type_sel: bool = False,
):
"""Specify data requirements for training.

Expand All @@ -103,6 +104,8 @@ def add_data_requirement(
default value of data
dtype : np.dtype, optional
the dtype of data, overwrites `high_prec` if provided
output_natoms_for_type_sel : bool, optional
if True and type_sel is True, the atomic dimension will be natoms instead of nsel
"""
data_requirement[key] = {
"ndof": ndof,
Expand All @@ -113,6 +116,7 @@ def add_data_requirement(
"repeat": repeat,
"default": default,
"dtype": dtype,
"output_natoms_for_type_sel": output_natoms_for_type_sel,
}


Expand Down
1 change: 1 addition & 0 deletions deepmd/pt/utils/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,4 +61,5 @@ def add_data_requirement(self, data_requirement: List[DataRequirementItem]):
repeat=data_item["repeat"],
default=data_item["default"],
dtype=data_item["dtype"],
output_natoms_for_type_sel=data_item["output_natoms_for_type_sel"],
)
60 changes: 56 additions & 4 deletions deepmd/utils/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@
repeat: int = 1,
default: float = 0.0,
dtype: Optional[np.dtype] = None,
output_natoms_for_type_sel: bool = False,
):
"""Add a data item that to be loaded.

Expand All @@ -173,6 +174,8 @@
default value of data
dtype : np.dtype, optional
the dtype of data, overwrites `high_prec` if provided
output_natoms_for_type_sel : bool, optional
if True and type_sel is True, the atomic dimension will be natoms instead of nsel
"""
self.data_dict[key] = {
"ndof": ndof,
Expand All @@ -184,6 +187,7 @@
"reduce": None,
"default": default,
"dtype": dtype,
"output_natoms_for_type_sel": output_natoms_for_type_sel,
}
return self

Expand Down Expand Up @@ -523,6 +527,9 @@
repeat=self.data_dict[kk]["repeat"],
default=self.data_dict[kk]["default"],
dtype=self.data_dict[kk]["dtype"],
output_natoms_for_type_sel=self.data_dict[kk][
"output_natoms_for_type_sel"
],
)
for kk in self.data_dict.keys():
if self.data_dict[kk]["reduce"] is not None:
Expand Down Expand Up @@ -589,19 +596,25 @@
type_sel=None,
default: float = 0.0,
dtype: Optional[np.dtype] = None,
output_natoms_for_type_sel: bool = False,
):
if atomic:
natoms = self.natoms
idx_map = self.idx_map
# if type_sel, then revise natoms and idx_map
if type_sel is not None:
natoms = 0
natoms_sel = 0
for jj in type_sel:
natoms += np.sum(self.atom_type == jj)
idx_map = self._idx_map_sel(self.atom_type, type_sel)
natoms_sel += np.sum(self.atom_type == jj)
idx_map_sel = self._idx_map_sel(self.atom_type, type_sel)
else:
natoms_sel = natoms
idx_map_sel = idx_map
ndof = ndof_ * natoms
else:
ndof = ndof_
natoms_sel = 0
idx_map_sel = None

Check notice

Code scanning / CodeQL

Unused local variable Note

Variable idx_map_sel is not used.
if dtype is not None:
pass
elif high_prec:
Expand All @@ -613,6 +626,38 @@
data = path.load_numpy().astype(dtype)
try: # YWolfeee: deal with data shape error
if atomic:
if type_sel is not None:
# check the data shape is nsel or natoms
if data.size == nframes * natoms_sel * ndof_:
if output_natoms_for_type_sel:
tmp = np.zeros(
[nframes, natoms, ndof_], dtype=data.dtype
)
sel_mask = np.isin(self.atom_type, type_sel)
tmp[:, sel_mask] = data.reshape(
[nframes, natoms_sel, ndof_]
)
data = tmp
else:
natoms = natoms_sel
idx_map = idx_map_sel
ndof = ndof_ * natoms
elif data.size == nframes * natoms * ndof_:
if output_natoms_for_type_sel:
pass
else:
sel_mask = np.isin(self.atom_type, type_sel)
data = data[:, sel_mask]
natoms = natoms_sel
idx_map = idx_map_sel
ndof = ndof_ * natoms
else:
raise ValueError(

Check warning on line 655 in deepmd/utils/data.py

View check run for this annotation

Codecov / codecov/patch

deepmd/utils/data.py#L655

Added line #L655 was not covered by tests
f"The shape of the data {key} in {set_name}"
f"is {data.shape}, which doesn't match either"
f"({nframes}, {natoms_sel}, {ndof_}) or"
f"({nframes}, {natoms}, {ndof_})"
)
data = data.reshape([nframes, natoms, -1])
data = data[:, idx_map, :]
data = data.reshape([nframes, -1])
Expand All @@ -621,13 +666,15 @@
explanation = "This error may occur when your label mismatch it's name, i.e. you might store global tensor in `atomic_tensor.npy` or atomic tensor in `tensor.npy`."
log.error(str(err_message))
log.error(explanation)
raise ValueError(str(err_message) + ". " + explanation)
raise ValueError(str(err_message) + ". " + explanation) from err_message

Check warning on line 669 in deepmd/utils/data.py

View check run for this annotation

Codecov / codecov/patch

deepmd/utils/data.py#L669

Added line #L669 was not covered by tests
if repeat != 1:
data = np.repeat(data, repeat).reshape([nframes, -1])
return np.float32(1.0), data
elif must:
raise RuntimeError("%s not found!" % path)
else:
if type_sel is not None and not output_natoms_for_type_sel:
ndof = ndof_ * natoms_sel
data = np.full([nframes, ndof], default, dtype=dtype)
if repeat != 1:
data = np.repeat(data, repeat).reshape([nframes, -1])
Expand Down Expand Up @@ -694,6 +741,8 @@
default value of data
dtype : np.dtype, optional
the dtype of data, overwrites `high_prec` if provided
output_natoms_for_type_sel : bool, optional
if True and type_sel is True, the atomic dimension will be natoms instead of nsel
"""

def __init__(
Expand All @@ -707,6 +756,7 @@
repeat: int = 1,
default: float = 0.0,
dtype: Optional[np.dtype] = None,
output_natoms_for_type_sel: bool = False,
) -> None:
self.key = key
self.ndof = ndof
Expand All @@ -717,6 +767,7 @@
self.repeat = repeat
self.default = default
self.dtype = dtype
self.output_natoms_for_type_sel = output_natoms_for_type_sel
self.dict = self.to_dict()

def to_dict(self) -> dict:
Expand All @@ -730,6 +781,7 @@
"repeat": self.repeat,
"default": self.default,
"dtype": self.dtype,
"output_natoms_for_type_sel": self.output_natoms_for_type_sel,
}

def __getitem__(self, key: str):
Expand Down
12 changes: 12 additions & 0 deletions deepmd/utils/data_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,10 @@ def add_dict(self, adict: dict) -> None:
type_sel=adict[kk]["type_sel"],
repeat=adict[kk]["repeat"],
default=adict[kk]["default"],
dtype=adict[kk].get("dtype"),
output_natoms_for_type_sel=adict[kk].get(
"output_natoms_for_type_sel", False
),
)

def add(
Expand All @@ -305,6 +309,8 @@ def add(
type_sel: Optional[List[int]] = None,
repeat: int = 1,
default: float = 0.0,
dtype: Optional[np.dtype] = None,
output_natoms_for_type_sel: bool = False,
):
"""Add a data item that to be loaded.

Expand All @@ -329,6 +335,10 @@ def add(
The data will be repeated `repeat` times.
default, default=0.
Default value of data
dtype
The dtype of data, overwrites `high_prec` if provided
output_natoms_for_type_sel : bool
If True and type_sel is True, the atomic dimension will be natoms instead of nsel
"""
for ii in self.data_systems:
ii.add(
Expand All @@ -340,6 +350,8 @@ def add(
repeat=repeat,
type_sel=type_sel,
default=default,
dtype=dtype,
output_natoms_for_type_sel=output_natoms_for_type_sel,
)

def reduce(self, key_out, key_in):
Expand Down
1 change: 1 addition & 0 deletions source/tests/tf/test_data_requirement.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ def test_add(self):
self.assertEqual(data_requirement["test"]["high_prec"], False)
self.assertEqual(data_requirement["test"]["repeat"], 1)
self.assertEqual(data_requirement["test"]["default"], 0.0)
self.assertEqual(data_requirement["test"]["output_natoms_for_type_sel"], False)
63 changes: 63 additions & 0 deletions source/tests/tf/test_deepmd_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def setUp(self):
os.makedirs(os.path.join(self.data_name, "set.foo"), exist_ok=True)
os.makedirs(os.path.join(self.data_name, "set.bar"), exist_ok=True)
os.makedirs(os.path.join(self.data_name, "set.tar"), exist_ok=True)
os.makedirs(os.path.join(self.data_name, "set.foo"), exist_ok=True)
np.savetxt(os.path.join(self.data_name, "type.raw"), np.array([1, 0]), fmt="%d")
np.savetxt(
os.path.join(self.data_name, "type_map.raw"),
Expand Down Expand Up @@ -141,6 +142,16 @@ def setUp(self):
np.save(path, self.test_frame_bar)
# t n
self.test_null = np.zeros([self.nframes, 2 * self.natoms])
# tensor shape
path = os.path.join(self.data_name, "set.foo", "tensor_natoms.npy")
self.tensor_natoms = np.random.default_rng().random(
[self.nframes, self.natoms, 6]
)
self.tensor_natoms[:, 0, :] = 0
np.save(path, self.tensor_natoms)
path = os.path.join(self.data_name, "set.foo", "tensor_nsel.npy")
self.tensor_nsel = self.tensor_natoms[:, 1, :]
np.save(path, self.tensor_nsel)

def tearDown(self):
shutil.rmtree(self.data_name)
Expand Down Expand Up @@ -292,6 +303,58 @@ def test_get_nbatch(self):
nb = dd.get_numb_batch(2, 0)
self.assertEqual(nb, 2)

def test_get_tensor(self):
dd_natoms = (
DeepmdData(self.data_name)
.add(
"tensor_nsel",
6,
atomic=True,
must=True,
type_sel=[0],
output_natoms_for_type_sel=True,
)
.add(
"tensor_natoms",
6,
atomic=True,
must=True,
type_sel=[0],
output_natoms_for_type_sel=True,
)
)
data_natoms = dd_natoms._load_set(os.path.join(self.data_name, "set.foo"))
dd_nsel = (
DeepmdData(self.data_name)
.add(
"tensor_nsel",
6,
atomic=True,
must=True,
type_sel=[0],
output_natoms_for_type_sel=False,
)
.add(
"tensor_natoms",
6,
atomic=True,
must=True,
type_sel=[0],
output_natoms_for_type_sel=False,
)
)
data_nsel = dd_nsel._load_set(os.path.join(self.data_name, "set.foo"))
np.testing.assert_allclose(
data_natoms["tensor_natoms"], data_natoms["tensor_nsel"]
)
np.testing.assert_allclose(data_nsel["tensor_natoms"], data_nsel["tensor_nsel"])
np.testing.assert_allclose(
data_natoms["tensor_natoms"].reshape(self.nframes, self.natoms, -1)[
:, 0, :
],
data_nsel["tensor_natoms"],
)

def _comp_np_mat2(self, first, second):
np.testing.assert_almost_equal(first, second, places)

Expand Down