Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pandas simplification v2 #826

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 0 additions & 77 deletions gmso/core/topology.py
Original file line number Diff line number Diff line change
Expand Up @@ -1237,83 +1237,6 @@ def write_forcefield(self, filename, overwrite=False):
ff = self.get_forcefield()
ff.to_xml(filename=filename, overwrite=overwrite)

def to_dataframe(self, parameter="sites", site_attrs=None, unyts_bool=True):
"""Return a pandas dataframe object for the sites in a topology

Parameters
----------
parameter : str, default='sites'
A string determining what aspects of the gmso topology will be reported.
Options are: 'sites', 'bonds', 'angles', 'dihedrals', and 'impropers'. Defaults to 'sites'.
site_attrs : list of str, default=None
List of strings that are attributes of the topology site and can be included as entries in the pandas dataframe.
Examples of these can be found by printing `topology.sites[0].__dict__`.
See https://gmso.mosdef.org/en/stable/data_structures.html#gmso.Atom for additional information on labeling.
unyts_bool: bool, default=True
Determine if numerical values are saved as unyt quantities or floats. See
https://unyt.readthedocs.io/en/stable/usage.html
for more information about manipulating unyt quantities.
Default is True.

Returns
-------
Pandas Dataframe
A pandas.Dataframe object, see https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html
for further information.

Examples
________
>>> topology.to_dataframe(parameter = 'sites', site_attrs = ['charge'])
This will return a dataframe with a listing of the sites and include the charges that correspond to each site.
>>> topology.to_dataframe(parameter = 'dihedrals', site_attrs = ['positions'])
This will return a dataframe with a listing of the sites that make up each dihedral, the positions of each of
those sites, and the parameters that are associated with the dihedrals.

Notes
____
A dataframe is easily manipulated. In order to change the rounding to two decimals places for a column named `label`:
>>> df['label'] = df['label'].round(2)
The column labels can also be easily modified. This line can take a dataframe `df` and rename a column labeled
`Atom0` to `newname` using a dictionary.
>>> df.rename(columns = {'Atom0':'newname'})
See https://pandas.pydata.org/pandas-docs/stable/getting_started/intro_tutorials/index.html for further information.
"""
from gmso.utils.io import import_

pd = import_("pandas")
if not site_attrs:
site_attrs = []
df = pd.DataFrame()
if not self.is_typed():
raise GMSOError(
"This topology is not typed, please type this object before converting to a pandas dataframe"
)
if parameter == "sites":
df["atom_types"] = list(site.atom_type.name for site in self.sites)
df["names"] = list(site.name for site in self.sites)
for attr in site_attrs:
df = self._parse_dataframe_attrs(df, attr, parameter, unyts_bool)
elif parameter in ["bonds", "angles", "dihedrals", "impropers"]:
if len(getattr(self, parameter)) == 0:
raise GMSOError(
f"There arent any {parameter} in the topology. The dataframe would be empty."
)
df = self._pandas_from_parameters(
df,
parameter=parameter,
site_attrs=site_attrs,
unyts_bool=unyts_bool,
)
df = self._parse_parameter_expression(df, parameter, unyts_bool)
else:
raise AttributeError(
"{} is not yet supported for outputting parameters to a dataframe. \
Please use one of 'sites', 'bonds', 'angles', 'dihedrals', or \
'impropers'".format(str(parameter))
)

return df

def get_forcefield(self):
"""Get an instance of gmso.ForceField out of this topology

Expand Down
1 change: 1 addition & 0 deletions gmso/external/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# ruff: noqa: F401
"""Support for various in-memory representations of chemical systems."""

from .convert_dataframe import to_dataframeDict
from .convert_hoomd import (
to_gsd_snapshot,
to_hoomd_forcefield,
Expand Down
Loading
Loading