Skip tests using AWS credential when it is not provided (#149)

* Skip some tests using AWS credentials when they are not specified * Unify CIs and rename * Add pull_request
himkt · Jun 23, 2021 · 3870227 · 3870227
1 parent 6c8cbb0
commit 3870227
Show file tree

Hide file tree

Showing 4 changed files with 70 additions and 7 deletions.
diff --git a/.github/workflows/pythonpackage.yml → .github/workflows/ci.yml b/.github/workflows/pythonpackage.yml → .github/workflows/ci.yml
@@ -1,4 +1,4 @@
-name: Python package
+name: PR CI
 
 on:
   push:

diff --git a/tests/api/v1/test_batch_tokenization.py b/tests/api/v1/test_batch_tokenization.py
@@ -1,19 +1,18 @@
+import os
 from typing import Dict
-from fastapi.testclient import TestClient
 
 import pytest
+from fastapi.testclient import TestClient
 
 from konoha.api.server import create_app
 
-
 app = create_app()
 client = TestClient(app)
 
 
 @pytest.mark.parametrize(
     "tokenizer_params", [
         {"tokenizer": "mecab"},
-        {"tokenizer": "mecab", "system_dictionary_path": "s3://konoha-demo/mecab/ipadic"},
         {"tokenizer": "sudachi", "mode": "A"},
         {"tokenizer": "sudachi", "mode": "B"},
         {"tokenizer": "sudachi", "mode": "C"},
@@ -30,3 +29,19 @@ def test_tokenization(tokenizer_params: Dict):
     response = client.post("/api/v1/batch_tokenize", headers=headers, json=params)
     assert response.status_code == 200
     assert "tokens_list" in response.json()
+
+
+@pytest.mark.parametrize(
+    "tokenizer_params", [
+        {"tokenizer": "mecab", "system_dictionary_path": "s3://konoha-demo/mecab/ipadic"},
+    ]
+)
+def test_tokenization_with_remote_resource(tokenizer_params: Dict):
+    if "AWS_ACCESS_KEY_ID" not in os.environ and tokenizer_params["system_dictionary_path"].startswith("s3://"):
+        pytest.skip("AWS credentials not found.")
+
+    headers = {"Content-Type": "application/json"}
+    params = dict(tokenizer_params, texts=["私は猫", "あなたは犬"])
+    response = client.post("/api/v1/batch_tokenize", headers=headers, json=params)
+    assert response.status_code == 200
+    assert "tokens_list" in response.json()
diff --git a/tests/api/v1/test_tokenization.py b/tests/api/v1/test_tokenization.py
@@ -1,11 +1,11 @@
+import os
 from typing import Dict
-from fastapi.testclient import TestClient
 
 import pytest
+from fastapi.testclient import TestClient
 
 from konoha.api.server import create_app
 
-
 app = create_app()
 client = TestClient(app)
 
@@ -14,7 +14,6 @@
     "tokenizer_params", [
         {"tokenizer": "mecab"},
         {"tokenizer": "mecab", "with_postag": True},
-        {"tokenizer": "mecab", "system_dictionary_path": "s3://konoha-demo/mecab/ipadic"},
         {"tokenizer": "sudachi", "mode": "A"},
         {"tokenizer": "sudachi", "mode": "B"},
         {"tokenizer": "sudachi", "mode": "C"},
@@ -31,3 +30,19 @@ def test_tokenization(tokenizer_params: Dict):
     response = client.post("/api/v1/tokenize", headers=headers, json=params)
     assert response.status_code == 200
     assert "tokens" in response.json()
+
+
+@pytest.mark.parametrize(
+    "tokenizer_params", [
+        {"tokenizer": "mecab", "system_dictionary_path": "s3://konoha-demo/mecab/ipadic"},
+    ]
+)
+def test_tokenization_with_remote_resoruce(tokenizer_params: Dict):
+    if "AWS_ACCESS_KEY_ID" not in os.environ and tokenizer_params["system_dictionary_path"].startswith("s3://"):
+        pytest.skip("AWS credentials not found.")
+
+    headers = {"Content-Type": "application/json"}
+    params = dict(tokenizer_params, text="私は猫")
+    response = client.post("/api/v1/tokenize", headers=headers, json=params)
+    assert response.status_code == 200
+    assert "tokens" in response.json()
diff --git a/tests/test_word_tokenizer.py b/tests/test_word_tokenizer.py
@@ -1,4 +1,5 @@
 import json
+import os
 from typing import Dict
 from typing import List
 
@@ -44,6 +45,19 @@ def test_tokenize_with_character(raw_texts: List[str], tokenizer_params: Dict):
     assert expect == result
 
 
+@pytest.mark.parametrize(
+    "tokenizer_params", [
+        {"tokenizer": "mecab", "system_dictionary_path": "s3://konoha-demo/mecab/ipadic"},
+    ]
+)
+def test_tokenize(raw_texts: List[str], tokenizer_params: Dict):
+    tokenizer_name = tokenizer_params["tokenizer"]
+    tokenizer = WordTokenizer(**tokenizer_params)
+    expect = [Token.from_dict(token_param) for token_param in read_lines(tokenizer_name)[0]]
+    result = tokenizer.tokenize(raw_texts[0])
+    assert expect == result
+
+
 @pytest.mark.parametrize(
     "tokenizer_params", [
         {"tokenizer": "mecab"},
@@ -66,3 +80,22 @@ def test_batch_tokenize_with_character(raw_texts: List[str], tokenizer_params: D
     ]
     result = tokenizer.batch_tokenize(raw_texts)
     assert expect == result
+
+
+@pytest.mark.parametrize(
+    "tokenizer_params", [
+        {"tokenizer": "mecab", "system_dictionary_path": "s3://konoha-demo/mecab/ipadic"},
+    ]
+)
+def test_batch_tokenize(raw_texts: List[str], tokenizer_params: Dict):
+    if "AWS_ACCESS_KEY_ID" not in os.environ and tokenizer_params["system_dictionary_path"].startswith("s3://"):
+        pytest.skip("AWS credentials not found.")
+
+    tokenizer_name = tokenizer_params["tokenizer"]
+    tokenizer = WordTokenizer(**tokenizer_params)
+    expect = [
+        [Token.from_dict(token_param) for token_param in token_params]
+        for token_params in read_lines(tokenizer_name)
+    ]
+    result = tokenizer.batch_tokenize(raw_texts)
+    assert expect == result