Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

285 enhancement csv upload admin portal #287

Closed
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions apps/greencheck/fixtures/test_dataset_csv.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
31.15.8.0/21
45.81.42.0/23
81.95.96.0/20
64 changes: 64 additions & 0 deletions apps/greencheck/importers/importer_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import requests
import logging
import pandas as pd
import ipdb
import re
import ipaddress

from apps.greencheck.importers.importer_interface import BaseImporter, Importer

from django.conf import settings

logger = logging.getLogger(__name__)


class CsvImporter(BaseImporter):
# def __init__(cls):
# cls.hosting_provider_id = settings.Csv_PROVIDER_ID

def fetch_data_from_source(cls):
# TODO: fetch data from website dropping
# For now: expect that a csv is saved somewhere and fetch in the following way:
raw_data = pd.read_csv(
"apps/greencheck/fixtures/test_dataset_csv.csv", header=None
roald-teunissen marked this conversation as resolved.
Show resolved Hide resolved
)
return cls.parse_to_list(raw_data)

def parse_to_list(cls, raw_data):
try:
list_of_ips = []

cls.validate_csv_file(raw_data)

if len(raw_data.columns) == 1:
# ASN or network (ip with subnet)
list_of_ips = list(raw_data.iloc[:, 0])
# elif len(data.columns) == 2:
# TODO: Implement a way to convert two ip addresses to a network
# # Start and ending ip range
# start_ip = data.iloc[:,0]
# end_ip = data.iloc[:,1]
# Create network from these
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it necessary to create an IP Network? We store IPranges in the database, and while a network can resemble an IP range that has a start and end IP, they're not necessarily the same thing.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At the moment, the saving interface function works with IP networks.
The reasoning behind this was so that we could easily have a list with a simple dimensions.
This posed a problem before, so I can this fundamental part around by allowing ip ranges.

# for index, row in data.iterrows():
# list_of_ips.append(row[index,0])
# print(row['c1'], row['c2'])
return list_of_ips
except Exception as e:
logger.exception("Something really unexpected happened. Aborting")

def validate_csv_file(cls, data):
if len(data.columns) == 1 or len(data.columns) == 2:
cls.validate_column_in_csv_file(data.iloc[:, 0].values.tolist())

if len(data.columns) == 2:
cls.validate_column_in_csv_file(data.iloc[:, 1].values.tolist())
else:
logger.exception("Number of columns in CSV are not as expected")

def validate_column_in_csv_file(cls, column):
for address in column:
if not re.search("(AS)[0-9]+$", address) or not isinstance(
ipaddress.ip_network(address),
(ipaddress.IPv4Network, ipaddress.IPv6Network),
):
logger.exception("Value of %s is in an incorrect format", address)
83 changes: 83 additions & 0 deletions apps/greencheck/tests/test_importer_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import pytest
import pathlib
import re
import pandas as pd
roald-teunissen marked this conversation as resolved.
Show resolved Hide resolved
import ipdb
from io import StringIO

from django.core.management import call_command
from apps.greencheck.importers.importer_csv import CsvImporter

from django.conf import settings


@pytest.fixture
def sample_data_raw():
"""
Retrieve a locally saved sample of the population to use for this test
Return: CSV
"""
this_file = pathlib.Path(__file__)
csv_path = this_file.parent.parent.joinpath("fixtures", "test_dataset_csv.csv")
roald-teunissen marked this conversation as resolved.
Show resolved Hide resolved
return pd.read_csv(csv_path, header=None)


@pytest.fixture
def sample_data_as_list(sample_data_raw):
"""
Retrieve a locally saved sample of the population to use for this test and parse it to a list
Return: List
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if you're adding a function signature and being explicit about the return value, and you please use the python typing in this case?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks!
I will include this later on, when I am sure what data structures is best suited for this function

"""
importer = CsvImporter()
return importer.parse_to_list(sample_data_raw)


@pytest.mark.django_db
class TestCsvImporter:
def test_parse_to_list(self, sample_data_raw):
"""
Test the parsing function.
"""
# Initialize Csv importer
importer = CsvImporter()

# Run parse list with sample data
list_of_addresses = importer.parse_to_list(sample_data_raw)

# Test: resulting list contains items
assert len(list_of_addresses) > 0

# def test_validate_csv_file(self):
# # TODO: Implement this test

# def test_validate_column_in_csv_file(self):
# # TODO: Implement this test


@pytest.mark.django_db
class TestCsvImportCommand:
"""
This just tests that we have a management command that can run.
We _could_ mock the call to fetch ip ranges, if this turns out to be a slow test.
"""

def test_handle(self, mocker, sample_data_as_list):
# mock the call to retrieve from source, to a locally stored
# testing sample. By instead using the test sample,
# we avoid unnecessary network requests.

# identify method we want to mock
path_to_mock = (
"apps.greencheck.importers.importer_csv."
"CsvImporter.fetch_data_from_source"
)

# define a different return when the targeted mock
# method is called
mocker.patch(
path_to_mock,
return_value=sample_data_as_list,
)

# TODO: Do we need this call command?
# call_command("update_networks_in_db_csv")
23 changes: 22 additions & 1 deletion docs/how-to.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,25 @@
# How to use..
## Sphinx

## Tests
## Tests using pytest

pytest.ini is called before running a pytest.
This file specifies what django settings (ds) to use, which annotated to exclude using the mark (-m) keyword and other functions.

#### Run all tests
Important: make sure to be outside of an enviroment (deactivate).
```
./run-tests.sh
```

#### Run all test until one fails
```
pipenv run pytest -x
```

## Gitpod environment set up steps
1. Make sure there is a branch available in the Github repository
2. Go to the workspace overview in Gitpod of TGWF
3. Run pre-build
Click on the pre-build option in the workspace overview of the workspace you want to prepare.
4. After this preperation, open the workspace and it's ready to be used
2 changes: 1 addition & 1 deletion pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
; so it can take precedent over any environment variables
; that might point DJANGO_SETTINGS_MODULE to development
; or production, instead of the testings module
addopts = --reuse-db --maxfail=0 -m "not smoke_test and not dramatiq and not flaky" --ds="greenweb.settings.testing"
addopts = --create-db --maxfail=0 -m "not smoke_test and not dramatiq and not flaky" --ds="greenweb.settings.testing"
python_files = tests.py test_*.py *_tests.py
markers =
only: Convenience method, so we can run a focussed test in pytest-watch
Expand Down
2 changes: 1 addition & 1 deletion run-tests.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
#!/usr/bin/env bash
pipenv run pytest
pipenv run pytest