Skip to content

Commit

Permalink
Set 1-based coordinate as our default (#633)
Browse files Browse the repository at this point in the history
* Set 1-based coordinate as our default

Variant Transforms was only able to generate BigQuery tables in 0-based coordinate. We added ability to choose between 0-based and 1-based in #586.
We decided to modify our default behavior to generate 1-based variants in the output tabels.

* Update integration tests

After we made 1-based coordinate as our default indexing we need to update all our `sum_start` queries.

* An almost empty commit

Hoping this will help Travis webhook works properly.
  • Loading branch information
samanvp authored Jul 7, 2020
1 parent 10a5c4c commit e7dd6da
Show file tree
Hide file tree
Showing 36 changed files with 154 additions and 153 deletions.
9 changes: 5 additions & 4 deletions gcp_variant_transforms/options/variant_transform_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,11 @@ def add_arguments(self, parser):
'performance penalty of an extra pass over all variants.'))
parser.add_argument(
'--use_1_based_coordinate',
type='bool', default=False, nargs='?', const=True,
help=('If true, start position will be 1 based, and end position will '
'be inclusive. Otherwise, by default the records will be stored '
'with 0 based coordinates, with exclusive end position.'))
type='bool', default=True, nargs='?', const=True,
help=('If true, start position will be 1-based, and end position will '
'be inclusive. Otherwise, the records will be stored in 0-based '
'coordinates, with exclusive end position. For more information '
'please refer to www.biostars.org/p/84686/'))

def validate(self, parsed_args):
# type: (argparse.Namespace) -> None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__chr1`"],
"expected_result": {"sum_start": 751907099239378}
"expected_result": {"sum_start": 751907105253770}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__chr1`"],
Expand All @@ -33,7 +33,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__chr2`"],
"expected_result": {"sum_start": 793122305774880}
"expected_result": {"sum_start": 793122312390064}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__chr2`"],
Expand All @@ -45,7 +45,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__chr3`"],
"expected_result": {"sum_start": 538227059824096}
"expected_result": {"sum_start": 538227065351004}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__chr3`"],
Expand All @@ -57,7 +57,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__chr4`"],
"expected_result": {"sum_start": 517336795238304}
"expected_result": {"sum_start": 517336800711834}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__chr4`"],
Expand All @@ -69,7 +69,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__chr5`"],
"expected_result": {"sum_start": 456439287325718}
"expected_result": {"sum_start": 456439292386152}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__chr5`"],
Expand All @@ -81,7 +81,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__chr6`"],
"expected_result": {"sum_start": 412018127892148}
"expected_result": {"sum_start": 412018132740998}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__chr6`"],
Expand All @@ -93,7 +93,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__chr7`"],
"expected_result": {"sum_start": 342272553704102}
"expected_result": {"sum_start": 342272558134564}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__chr7`"],
Expand All @@ -105,7 +105,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__chr8`"],
"expected_result": {"sum_start": 300642747304808}
"expected_result": {"sum_start": 300642751672486}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__chr8`"],
Expand All @@ -117,7 +117,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__chr9`"],
"expected_result": {"sum_start": 231608078280770}
"expected_result": {"sum_start": 231608081585546}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__chr9`"],
Expand All @@ -129,7 +129,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__chr10`"],
"expected_result": {"sum_start": 253789500252972}
"expected_result": {"sum_start": 253789504018298}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__chr10`"],
Expand All @@ -141,7 +141,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__chr11`"],
"expected_result": {"sum_start": 256396593831270}
"expected_result": {"sum_start": 256396597621086}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__chr11`"],
Expand All @@ -153,7 +153,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__chr12`"],
"expected_result": {"sum_start": 245707031530008}
"expected_result": {"sum_start": 245707035186020}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__chr12`"],
Expand All @@ -165,7 +165,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__chr13`"],
"expected_result": {"sum_start": 184996594784522}
"expected_result": {"sum_start": 184996597530522}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__chr13`"],
Expand All @@ -177,7 +177,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__chr14`"],
"expected_result": {"sum_start": 161133805210480}
"expected_result": {"sum_start": 161133807726988}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__chr14`"],
Expand All @@ -189,7 +189,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__chr15`"],
"expected_result": {"sum_start": 143122130090650}
"expected_result": {"sum_start": 143122132351758}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__chr15`"],
Expand All @@ -201,7 +201,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__chr16`"],
"expected_result": {"sum_start": 114391507193184}
"expected_result": {"sum_start": 114391509614422}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__chr16`"],
Expand All @@ -213,7 +213,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__chr17`"],
"expected_result": {"sum_start": 86578003935668}
"expected_result": {"sum_start": 86578006029134}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__chr17`"],
Expand All @@ -225,7 +225,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__chr18`"],
"expected_result": {"sum_start": 88079277266182}
"expected_result": {"sum_start": 88079279443822}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__chr18`"],
Expand All @@ -237,7 +237,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__chr19`"],
"expected_result": {"sum_start": 48834764418176}
"expected_result": {"sum_start": 48834766050406}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__chr19`"],
Expand All @@ -249,7 +249,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__chr20`"],
"expected_result": {"sum_start": 54374276051134}
"expected_result": {"sum_start": 54374277761466}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__chr20`"],
Expand All @@ -261,7 +261,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__chr21`"],
"expected_result": {"sum_start": 32398646080140}
"expected_result": {"sum_start": 32398647118070}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__chr21`"],
Expand All @@ -273,7 +273,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__chr22`"],
"expected_result": {"sum_start": 34479342946504}
"expected_result": {"sum_start": 34479343935160}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__chr22`"],
Expand All @@ -285,7 +285,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__chrX`"],
"expected_result": {"sum_start": 227484009361574}
"expected_result": {"sum_start": 227484012336528}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__chrX`"],
Expand All @@ -297,7 +297,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__chrY`"],
"expected_result": {"sum_start": 328078200660}
"expected_result": {"sum_start": 328078219388}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__chrY`"],
Expand All @@ -309,7 +309,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__residual`"],
"expected_result": {"sum_start": 25257887}
"expected_result": {"sum_start": 25260721}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__residual`"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
},
{
"query": ["SUM_START_QUERY"],
"expected_result": {"sum_start": 32190612292607}
"expected_result": {"sum_start": 32190612702539}
},
{
"query": ["SUM_END_QUERY"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
" GROUP BY 1, 2, 3",
")"
],
"expected_result": {"hash_sum": 17184695290}
"expected_result": {"hash_sum": 17184716457}
},
{
"query": [
Expand All @@ -56,7 +56,7 @@
" GROUP BY 1, 2, 3",
")"
],
"expected_result": {"hash_sum": 17180709457}
"expected_result": {"hash_sum": 17180730516}
},
{
"query": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
" GROUP BY 1, 2, 3",
")"
],
"expected_result": {"hash_sum": 17184695290}
"expected_result": {"hash_sum": 17184716457}
},
{
"query": [
Expand All @@ -56,7 +56,7 @@
" GROUP BY 1, 2, 3",
")"
],
"expected_result": {"hash_sum": 17180709457}
"expected_result": {"hash_sum": 17180730516}
},
{
"query": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
" GROUP BY 1, 2, 3",
")"
],
"expected_result": {"hash_sum": 143375297338}
"expected_result": {"hash_sum": 143375343108}
},
{
"query": [
Expand All @@ -57,7 +57,7 @@
" GROUP BY 1, 2, 3",
")"
],
"expected_result": {"hash_sum": 143375297338}
"expected_result": {"hash_sum": 143375343108}
},
{
"query": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
" GROUP BY 1, 2, 3",
")"
],
"expected_result": {"hash_sum": 143375297338}
"expected_result": {"hash_sum": 143375343108}
},
{
"query": [
Expand All @@ -57,7 +57,7 @@
" GROUP BY 1, 2, 3",
")"
],
"expected_result": {"hash_sum": 143375297338}
"expected_result": {"hash_sum": 143375343108}
},
{
"query": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__chr1`"],
"expected_result": {"sum_start": 2932144978501378}
"expected_result": {"sum_start": 2932145002523549}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__chr1`"],
Expand All @@ -28,7 +28,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__chr2`"],
"expected_result": {"sum_start": 2953428836386527}
"expected_result": {"sum_start": 2953428860821187}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__chr2`"],
Expand All @@ -40,7 +40,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__chr3`"],
"expected_result": {"sum_start": 1892873634919601}
"expected_result": {"sum_start": 1892873654063051}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__chr3`"],
Expand All @@ -52,7 +52,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__residual`"],
"expected_result": {"sum_start": 9716663}
"expected_result": {"sum_start": 9717889}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__residual`"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__chr1`"],
"expected_result": {"sum_start": 2469581012601456}
"expected_result": {"sum_start": 2469581032808147}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__chr1`"],
Expand All @@ -32,7 +32,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__chr2`"],
"expected_result": {"sum_start": 2506042568449967}
"expected_result": {"sum_start": 2506042589162349}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__chr2`"],
Expand All @@ -44,7 +44,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__chr3`"],
"expected_result": {"sum_start": 1605674406003590}
"expected_result": {"sum_start": 1605674422198391}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__chr3`"],
Expand All @@ -56,7 +56,7 @@
},
{
"query": ["SELECT SUM(start_position) AS sum_start FROM `{DATASET_ID}.{TABLE_ID}__residual`"],
"expected_result": {"sum_start": 5056035}
"expected_result": {"sum_start": 5056702}
},
{
"query": ["SELECT SUM(end_position) AS sum_end FROM `{DATASET_ID}.{TABLE_ID}__residual`"],
Expand Down
Loading

0 comments on commit e7dd6da

Please sign in to comment.