Skip to content

Commit

Permalink
Merge pull request #1 from Noble-Lab/bug-v0.0.1
Browse files Browse the repository at this point in the history
Fixed read file bug
  • Loading branch information
donnyyy777 authored Dec 9, 2020
2 parents 5525da2 + ae0b71a commit 1491f3a
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 13 deletions.
Binary file modified crema/__pycache__/parsers.cpython-37.pyc
Binary file not shown.
10 changes: 4 additions & 6 deletions crema/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ def read_file(
spectrum_col="scan",
score_col="combined p-value",
target_col="target/decoy",
delimiter="," or "\t",
):
"""
Read tab-delimited files.
Expand All @@ -27,9 +26,6 @@ def read_file(
name of the column that defines the scores (p-values) of the psms
target_col : str
name of the column that indicates if a psm is a target/decoy
delimiter : str
string character equal to what is used to separate columns
within the tab-delimited file
Returns
-------
Expand All @@ -46,13 +42,15 @@ def read_file(
fields.append(col)
fields.append(score_col)
fields.append(target_col)
# fields = [spectrum_col, score_col, target_col]

# Create empty Pandas dataframe
data = pd.DataFrame()

# Loop through all given files
for file in input_files:
data = data.append(
pd.read_csv(file, sep=delimiter, usecols=fields), ignore_index=True
pd.read_csv(file, sep=None, usecols=fields, engine="python"),
ignore_index=True,
)
data = convert_target_col(data, target_col)
return PsmDataset(data, spectrum_col, score_col, target_col)
Expand Down
11 changes: 11 additions & 0 deletions data/single_basic_tab.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
scan combined p-value target/decoy
1 0.7 TRUE
2 0.4 FALSE
3 0.1 TRUE
4 0.55 TRUE
5 0.25 TRUE
1 0.6 TRUE
2 0.2 FALSE
3 0.7 FALSE
4 0.56 TRUE
5 0.3 FALSE
22 changes: 22 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,28 @@ def test_cli_basic(tmp_path):
assert os.path.isfile(os.path.join(tmp_path, "crema.logfile.log"))


def test_cli_basic_tab(tmp_path):
"""
Test that the cli works with a tab delimited file. Reads in a file
with crux default column names.
Parameters
----------
tmp_path : pytest fixture of a temporary directory
A pytest temporary directory unique to the test invocation
Returns
-------
Pandas Assert Frame
Asserts whether or not the the results file and log file
are created properly with the correct file path.
"""
cmd = ["crema", "data/single_basic_tab.txt", "--output_dir", tmp_path]
subprocess.run(cmd, check=True)
assert os.path.isfile(os.path.join(tmp_path, "crema.psm_results.txt"))
assert os.path.isfile(os.path.join(tmp_path, "crema.logfile.log"))


def test_cli_custom_root(tmp_path):
"""
Test that the cli works with custom file root.
Expand Down
55 changes: 48 additions & 7 deletions tests/test_crema_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,12 +341,30 @@ def test_single_basic_dataset_class():
-------
PsmDataset
A :py:class:`~crema.dataset.PsmDataset` object
containing the PSM data from the given tab-delimited file.
containing the PSM data from the given comma separated value file.
"""
psm = read_file(["data/single_basic.csv"])
return psm


@pytest.fixture
def test_single_basic_tab_dataset_class():
"""
Creates a pytest fixture of a PsmDataset object
by reading in "single_basic_tab.csv" to use in
subsequent test cases. This file has crux
default column names.
Returns
-------
PsmDataset
A :py:class:`~crema.dataset.PsmDataset` object
containing the PSM data from the given tab delimited file.
"""
psm = read_file(["data/single_basic_tab.txt"])
return psm


@pytest.fixture
def test_single_int_targets_dataset_class():
"""
Expand All @@ -361,7 +379,7 @@ def test_single_int_targets_dataset_class():
-------
PsmDataset
A :py:class:`~crema.dataset.PsmDataset` object
containing the PSM data from the given tab-delimited file.
containing the PSM data from the given comma separated value file.
"""
psm = read_file(["data/single_int_targets.csv"])
return psm
Expand All @@ -380,7 +398,7 @@ def test_single_text_scan_dataset_class():
-------
PsmDataset
A :py:class:`~crema.dataset.PsmDataset` object
containing the PSM data from the given tab-delimited file.
containing the PSM data from the given comma separated value file.
"""
psm = read_file(["data/single_text_scan.csv"])
return psm
Expand All @@ -399,7 +417,7 @@ def test_single_add_spectrum_dataset_class():
-------
PsmDataset
A :py:class:`~crema.dataset.PsmDataset` object
containing the PSM data from the given tab-delimited file.
containing the PSM data from the given comma separated value file.
"""
psm = read_file(
["data/single_add_spectrum.csv"], spectrum_col=["scan", "extras"]
Expand All @@ -420,7 +438,7 @@ def test_single_arbitrary_dataset_class():
-------
PsmDataset
A :py:class:`~crema.dataset.PsmDataset` object
containing the PSM data from the given tab-delimited file.
containing the PSM data from the given comma separated value file.
"""
psm = read_file(["data/single_arbitrary.csv"])
return psm
Expand All @@ -439,7 +457,7 @@ def test_single_noncrux_dataset_class():
-------
PsmDataset
A :py:class:`~crema.dataset.PsmDataset` object
containing the PSM data from the given tab-delimited file.
containing the PSM data from the given comma separated value file.
"""
psm = read_file(["data/single_noncrux.csv"], "scan", "p-value", "target")
return psm
Expand All @@ -458,7 +476,7 @@ def test_multi_dataset_class():
-------
PsmDataset
A :py:class:`~crema.dataset.PsmDataset` object
containing the PSM data from the given tab-delimited files.
containing the PSM data from the given comma separated value files.
"""
files = ["data/multi_target.csv", "data/multi_decoy.csv"]
psm = read_file(files)
Expand Down Expand Up @@ -511,6 +529,29 @@ def test_single_basic_tdc(test_single_basic_dataset_class):
pd.testing.assert_frame_equal(actual, compare)


def test_single_basic_tab_data(test_single_basic_tab_dataset_class):
"""
Checks whether or not a PsmDataset object is created properly
after reading in a single tab delimited file with crux default column names,
but tab delimited instead of comma separated. The point of this test is
to ensure that crema can read data separated by tabs as well as commas.
Parameters
----------
test_single_basic_tab_dataset_class : pytest fixture of a PsmDataset object
A psm object created by reading in the "single_basic_tab.txt" file
Returns
-------
Pandas Assert Frame
Asserts whether or not the data in PsmDataset object
is equal to the dataframe named "testframe_single_basic_data"
"""
actual = testframe_single_basic_data.copy()
compare = test_single_basic_tab_dataset_class.data
pd.testing.assert_frame_equal(actual, compare)


def test_single_int_targets_data(test_single_int_targets_dataset_class):
"""
Checks whether or not a PsmDataset object is created properly
Expand Down

0 comments on commit 1491f3a

Please sign in to comment.