Skip to content

Commit

Permalink
feat: add basic support for ranges
Browse files Browse the repository at this point in the history
  • Loading branch information
lyschoening committed Mar 31, 2016
1 parent 6f49d74 commit 1942db4
Show file tree
Hide file tree
Showing 7 changed files with 95 additions and 18 deletions.
6 changes: 3 additions & 3 deletions genotype.enbf
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@ VARIANT_DEFINITION = @:IDENTIFIER {("," | ";") [sep] @:IDENTIFIER}*;

BINARY_VARIANT = "+" | "-";

RANGE = "[" [type:RANGE_SEQUENCE_TYPE] start:INTEGER "_" end:INTEGER "]"
| "[" [type:RANGE_SEQUENCE_TYPE] pos:INTEGER "]";
RANGE = "[" [level:RANGE_SEQUENCE_LEVEL] start:INTEGER "_" end:INTEGER "]"
| "[" [level:RANGE_SEQUENCE_LEVEL] pos:INTEGER "]";

RANGE_SEQUENCE_TYPE = ("c" | "p") ".";
RANGE_SEQUENCE_LEVEL = @:("c" | "p") ".";

(* NOTE ACCESSION with its optional ":" can be ambiguous when it is used within a fusion and the IDENTIFIER is not numeric.
In these cases, a DATABASE should be specified. *)
Expand Down
17 changes: 9 additions & 8 deletions gnomic/grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from grako.util import re, RE_FLAGS


__version__ = (2016, 1, 19, 8, 48, 30, 1)
__version__ = (2016, 3, 31, 14, 28, 44, 3)

__all__ = [
'GnomicParser',
Expand Down Expand Up @@ -373,8 +373,8 @@ def _RANGE_(self):
with self._option():
self._token('[')
with self._optional():
self._RANGE_SEQUENCE_TYPE_()
self.ast['type'] = self.last_node
self._RANGE_SEQUENCE_LEVEL_()
self.ast['level'] = self.last_node
self._INTEGER_()
self.ast['start'] = self.last_node
self._token('_')
Expand All @@ -384,27 +384,28 @@ def _RANGE_(self):
with self._option():
self._token('[')
with self._optional():
self._RANGE_SEQUENCE_TYPE_()
self.ast['type'] = self.last_node
self._RANGE_SEQUENCE_LEVEL_()
self.ast['level'] = self.last_node
self._INTEGER_()
self.ast['pos'] = self.last_node
self._token(']')
self._error('no available options')

self.ast._define(
['type', 'start', 'end', 'pos'],
['level', 'start', 'end', 'pos'],
[]
)

@graken()
def _RANGE_SEQUENCE_TYPE_(self):
def _RANGE_SEQUENCE_LEVEL_(self):
with self._group():
with self._choice():
with self._option():
self._token('c')
with self._option():
self._token('p')
self._error('expecting one of: c p')
self.ast['@'] = self.last_node
self._token('.')

@graken()
Expand Down Expand Up @@ -518,7 +519,7 @@ def BINARY_VARIANT(self, ast):
def RANGE(self, ast):
return ast

def RANGE_SEQUENCE_TYPE(self, ast):
def RANGE_SEQUENCE_LEVEL(self, ast):
return ast

def ACCESSION(self, ast):
Expand Down
23 changes: 23 additions & 0 deletions gnomic/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,29 @@ def __repr__(self):
for key, value in self.__dict__.items() if value))


class Range(object):
"""
An inclusive range at a coding (DNA), RNA or protein level.
"""
def __init__(self, level, start, end):
self.level = level
self.start = start
self.end = end

def __hash__(self):
return hash(self.level) + \
hash(self.start) + \
hash(self.end)

def __len__(self):
return self.end - self.start + 1

def __repr__(self):
if self.start == self.end:
return '{}({}, {})'.format(self.__class__.__name__, repr(self.level), self.start)
return '{}({}, {}, {})'.format(self.__class__.__name__, repr(self.level), self.start, self.end)


class Organism(object):
def __init__(self, name, aliases=None):
self.name = name
Expand Down
16 changes: 15 additions & 1 deletion gnomic/semantics.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from gnomic.models import Mutation, Fusion, Plasmid, Feature, Organism, Accession, Type, FeatureTree
from gnomic.models import Mutation, Fusion, Plasmid, Feature, Organism, Accession, Type, FeatureTree, Range
from gnomic.grammar import GnomicSemantics


Expand Down Expand Up @@ -37,6 +37,20 @@ def replacement(self, ast):
def deletion(self, ast):
return Mutation(ast.old, None, marker=ast.marker)

def RANGE(self, ast):
level = {
'c': 'coding',
'r': 'RNA',
'p': 'protein'
}[ast.level]

if ast.pos:
return Range(level, ast.pos, ast.pos)
return Range(level, ast.start, ast.end)

def INTEGER(self, ast):
return int(ast)

def ACCESSION(self, ast):
return Accession(ast['id'], ast['db'])

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setup(
name='gnomic',
version='0.1.1',
version='0.2.0',
packages=find_packages(exclude=['*tests*']),
license='Apache',
author='Lars Schöning',
Expand Down
36 changes: 35 additions & 1 deletion tests/test_genotype.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from unittest import TestCase, SkipTest

from gnomic import Genotype, Feature, Ins, Del, Fusion, Sub, Type
from gnomic import Genotype, Feature, Ins, Del, Fusion, Sub, Type, Range


class BaseTestCase(TestCase):
Expand Down Expand Up @@ -118,6 +118,40 @@ def test_no_delete_if_present(self):
Del(Feature(name='geneA')),
}, self.chain('+geneA(x)', '-geneA').changes())


class GenotypeRangeTestCase(BaseTestCase):

def test_delete_range_basic(self):
self.assertEqual({
Del(Feature(name='geneA', range=Range('coding', 5, 10))),
}, self.chain('-geneA[c.5_10]').changes())

self.assertEqual({
Del(Feature(name='geneA', range=Range('protein', 5, 5))),
}, self.chain('-geneA[p.5]').changes())

def test_delete_insert(self):
self.assertEqual({
Ins(Feature(name='geneA')),
}, self.chain('-geneA[c.5_10]', '+geneA').changes())

@SkipTest
def test_delete_multiple_ranges(self):
# TODO in the current implementation, only the most recently deleted range is accounted for.
# TODO this implementation may change

self.assertEqual({
# Del(Feature(name='geneA', range=Range('coding', 5, 10))),
Del(Feature(name='geneA', range=Range('coding', 11, 12))),
}, self.chain('-geneA[c.5_10]', '-geneA[c.11_12]').changes())

self.assertEqual({
Del(Feature(name='geneA'))
}, self.chain('-geneA[c.5_10]', '-geneA').changes())


# TODO detailed tracking of different bits & pieces of features.

class GenotypeFusionsTestCase(BaseTestCase):

@SkipTest
Expand Down
13 changes: 9 additions & 4 deletions tests/test_grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,22 @@ def test_parse_simple_insertions(self):
], parse('+fooF'))

self.assertEqual([
Ins(Feature(name='fooF', accession=Accession(identifier='123', database='FOO')))
Ins(Feature(name='fooF', accession=Accession(identifier=123, database='FOO')))
], parse('+fooF#FOO:123'))

self.assertEqual([
Ins(Feature(accession=Accession(identifier='123', database='FOO')))
Ins(Feature(accession=Accession(identifier=123, database='FOO')))
], parse('+#FOO:123'))

self.assertEqual([
Ins(Feature(accession=Accession(identifier='123')))
Ins(Feature(accession=Accession(identifier='BAR', database='FOO')))
], parse('+#FOO:BAR'))

self.assertEqual([
Ins(Feature(accession=Accession(identifier=123)))
], parse('+#123'))


def test_parse_variants(self):
self.assertEqual([
Feature(type=Type('phene'), name='A', variant='wild-type')
Expand Down Expand Up @@ -69,7 +74,7 @@ def test_parse_variants(self):

self.assertEqual([
Feature(type=Type('phene'),
accession=Accession(identifier='123', database='FOO'),
accession=Accession(identifier=123, database='FOO'),
variant='wild-type')
], parse('#FOO:123+'))

Expand Down

0 comments on commit 1942db4

Please sign in to comment.