Merge branch 'release/v0.4.2'

speedcell4 · Mar 3, 2022 · 630afcf · 630afcf
2 parents a033a95 + 1508825
commit 630afcf
Show file tree

Hide file tree

Showing 18 changed files with 747 additions and 407 deletions.
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
@@ -17,7 +17,7 @@ jobs:
     - name: Set up Python
       uses: actions/setup-python@v2
       with:
-        python-version: '3.7'
+        python-version: '3.8'
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip

diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
@@ -9,10 +9,10 @@ jobs:
 
     steps:
       - uses: actions/checkout@v2
-      - name: Set up Python 3.7
+      - name: Set up Python
         uses: actions/setup-python@v2
         with:
-          python-version: 3.7
+          python-version: '3.8'
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip

diff --git a/README.md b/README.md
@@ -6,13 +6,20 @@
 
 ## Requirements
 
-- Python 3.7
-- PyTorch 1.6.0
+- Python 3.8
+- PyTorch 1.10.2
 
 ## Installation
 
 `python3 -m pip torchlatent`
 
+## Performance
+
+```
+TorchLatent (0.109244) => 0.003781 0.017763 0.087700 0.063497
+Third       (0.232487) =>          0.103277 0.129209 0.145311
+```
+
 ## Usage
 
 ```python

diff --git a/benchmark/__init__.py b/benchmark/__init__.py
diff --git a/benchmark/__main__.py b/benchmark/__main__.py
@@ -0,0 +1,9 @@
+from aku import Aku
+
+from benchmark.crf import benchmark_crf
+
+aku = Aku()
+
+aku.option(benchmark_crf)
+
+aku.run()
diff --git a/benchmark/crf.py b/benchmark/crf.py
@@ -0,0 +1,62 @@
+import torch
+from torchrua import pack_sequence
+from tqdm import tqdm
+
+from benchmark.meter import TimeMeter
+from tests.third_party import ThirdPartyCrfDecoder
+from torchlatent.crf import CrfDecoder
+
+
+def benchmark_crf(num_tags: int = 50, num_conjugates: int = 1, num_runs: int = 100,
+                  batch_size: int = 32, max_token_size: int = 512):
+    j1, f1, b1, d1, = TimeMeter(), TimeMeter(), TimeMeter(), TimeMeter()
+    j2, f2, b2, d2, = TimeMeter(), TimeMeter(), TimeMeter(), TimeMeter()
+
+    if torch.cuda.is_available():
+        device = torch.device('cuda:0')
+    else:
+        device = torch.device('cpu')
+    print(f'device => {device}')
+
+    decoder = CrfDecoder(num_tags=num_tags, num_conjugates=num_conjugates).to(device=device)
+    print(f'decoder => {decoder}')
+
+    third_decoder = ThirdPartyCrfDecoder(num_tags=num_tags, num_conjugates=num_conjugates).to(device=device)
+    print(f'third_decoder => {third_decoder}')
+
+    for _ in tqdm(range(num_runs)):
+        token_sizes = torch.randint(1, max_token_size + 1, (batch_size,), device=device).detach().cpu().tolist()
+
+        emissions = pack_sequence([
+            torch.randn((token_size, num_conjugates, num_tags), device=device, requires_grad=True)
+            for token_size in token_sizes
+        ])
+
+        tags = pack_sequence([
+            torch.randint(0, num_tags, (token_size, num_conjugates), device=device)
+            for token_size in token_sizes
+        ])
+
+        with j1:
+            indices = decoder.compile_indices(emissions=emissions, tags=tags)
+
+        with f1:
+            loss = decoder.fit(emissions=emissions, tags=tags, indices=indices).neg().mean()
+
+        with b1:
+            _, torch.autograd.grad(loss, emissions.data, torch.ones_like(loss))
+
+        with d1:
+            _ = decoder.decode(emissions=emissions, indices=indices)
+
+        with f2:
+            loss = third_decoder.fit(emissions=emissions, tags=tags).neg().mean()
+
+        with b2:
+            _, torch.autograd.grad(loss, emissions.data, torch.ones_like(loss))
+
+        with d2:
+            _ = third_decoder.decode(emissions=emissions)
+
+    print(f'TorchLatent ({j1.merit + f1.merit + b1.merit:.6f}) => {j1} {f1} {b1} {d1}')
+    print(f'Third       ({j2.merit + f2.merit + b2.merit:.6f}) => {j2} {f2} {b2} {d2}')
diff --git a/benchmark/meter.py b/benchmark/meter.py
@@ -0,0 +1,23 @@
+from datetime import datetime
+
+
+class TimeMeter(object):
+    def __init__(self) -> None:
+        super(TimeMeter, self).__init__()
+
+        self.seconds = 0
+        self.counts = 0
+
+    def __enter__(self):
+        self.start_tm = datetime.now()
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.seconds += (datetime.now() - self.start_tm).total_seconds()
+        self.counts += 1
+
+    @property
+    def merit(self) -> float:
+        return self.seconds / max(1, self.counts)
+
+    def __repr__(self) -> str:
+        return f'{self.merit :.6f}'
diff --git a/setup.py b/setup.py
@@ -4,17 +4,17 @@
 
 setup(
     name=name,
-    version='0.4.1',
+    version='0.4.2',
     packages=[package for package in find_packages() if package.startswith(name)],
     url='https://github.com/speedcell4/torchlatent',
     license='MIT',
     author='speedcell4',
     author_email='[email protected]',
     description='High Performance Structured Prediction in PyTorch',
-    python_requires='>=3.7',
+    python_requires='>=3.8',
     install_requires=[
         'numpy',
-        'torchrua>=0.3.0',
+        'torchrua>=0.4.0',
     ],
     extras_require={
         'dev': [

diff --git a/tests/strategies.py b/tests/strategies.py
@@ -2,20 +2,14 @@
 
 from hypothesis import strategies as st
 
-if torch.cuda.is_available():
-    MAX_BATCH_SIZE = 120
-    MAX_TOKEN_SIZE = 512
-    MAX_NUM_TAGS = 100
-    MAX_NUM_CONJUGATES = 16
-else:
-    MAX_BATCH_SIZE = 12
-    MAX_TOKEN_SIZE = 24
-    MAX_NUM_TAGS = 12
-    MAX_NUM_CONJUGATES = 6
-
 TINY_BATCH_SIZE = 6
 TINY_TOKEN_SIZE = 12
 
+BATCH_SIZE = 24
+TOKEN_SIZE = 50
+NUM_TAGS = 8
+NUM_CONJUGATES = 5
+
 
 @st.composite
 def devices(draw):
@@ -28,36 +22,13 @@ def devices(draw):
 
 
 @st.composite
-def batch_sizes(draw, max_value: int = MAX_BATCH_SIZE):
-    return draw(st.integers(min_value=1, max_value=max_value))
-
-
-@st.composite
-def batch_size_lists(draw, max_batch_size: int = MAX_BATCH_SIZE):
-    return [
-        draw(batch_sizes(max_value=max_batch_size))
-        for _ in range(draw(batch_sizes(max_value=max_batch_size)))
-    ]
-
-
-@st.composite
-def token_sizes(draw, max_value: int = MAX_TOKEN_SIZE):
-    return draw(st.integers(min_value=1, max_value=max_value))
-
+def sizes(draw, *size: int, min_size: int = 1):
+    max_size, *size = size
 
-@st.composite
-def token_size_lists(draw, max_token_size: int = MAX_TOKEN_SIZE, max_batch_size: int = MAX_BATCH_SIZE):
-    return [
-        draw(token_sizes(max_value=max_token_size))
-        for _ in range(draw(batch_sizes(max_value=max_batch_size)))
-    ]
-
-
-@st.composite
-def tag_sizes(draw, max_value: int = MAX_NUM_TAGS):
-    return draw(st.integers(min_value=1, max_value=max_value))
-
-
-@st.composite
-def conjugate_sizes(draw, max_value: int = MAX_NUM_CONJUGATES):
-    return draw(st.integers(min_value=1, max_value=max_value))
+    if len(size) == 0:
+        return draw(st.integers(min_value=min_size, max_value=max_size))
+    else:
+        return [
+            draw(sizes(*size, min_size=min_size))
+            for _ in range(draw(st.integers(min_value=min_size, max_value=max_size)))
+        ]