Skip to content

Commit

Permalink
Replace BED score type with Int (#43)
Browse files Browse the repository at this point in the history
* Replaced BED score type with Int
* Added a test for int score parsing
* Added CHANGES, edit the date before release

See issue JetBrains-Research/bioinf-commons#2
  • Loading branch information
dievsky authored May 6, 2019
1 parent 3d0a9e6 commit 5c8ba14
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 28 deletions.
10 changes: 10 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,16 @@ big Changelog

Here you can see the full list of changes between each big release.

Version 0.8.5
-------------

Released on XXXXXX

Fixed
- `ExtendedBedEntry.score` is now Int instead of Short. The reason for this is that many
BED file providers (e.g. MACS2, SICER) don't respect the UCSC standard which limits the score
to 0..1000 range, and we want to be able to parse those files.

Version 0.8.4
-------------

Expand Down
59 changes: 32 additions & 27 deletions src/main/kotlin/org/jetbrains/bio/big/Bed.kt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class BedFile(val path: Path) : Iterable<BedEntry>, Closeable {
return reader.lines().map { line ->
val chunks = line.split('\t', limit = 4)
BedEntry(chunks[0], chunks[1].toInt(), chunks[2].toInt(),
if (chunks.size == 3) "" else chunks[3])
if (chunks.size == 3) "" else chunks[3])
}.iterator()
}

Expand Down Expand Up @@ -59,9 +59,12 @@ data class BedEntry(
* @param delimiter Custom delimiter for malformed data
* @param omitEmptyStrings Treat several consecutive separators as one
*/
fun unpack(fieldsNumber: Byte = 12, extraFieldsNumber: Int? = null,
delimiter: Char = '\t',
omitEmptyStrings: Boolean = false): ExtendedBedEntry {
fun unpack(
fieldsNumber: Byte = 12,
extraFieldsNumber: Int? = null,
delimiter: Char = '\t',
omitEmptyStrings: Boolean = false
): ExtendedBedEntry {

check(fieldsNumber in 3..12) { "Fields number expected 3..12, but was $fieldsNumber" }

Expand All @@ -80,7 +83,7 @@ data class BedEntry(
val score = when {
fieldsNumber >= 5 && it.hasNext() -> {
val chunk = it.next()
if (chunk == ".") 0 else chunk.toShort()
if (chunk == ".") 0 else chunk.toInt()
}
else -> 0
}
Expand Down Expand Up @@ -147,10 +150,12 @@ data class BedEntry(
null
}

return ExtendedBedEntry(chrom, start, end,
if (name == "") "." else name,
score, strand, thickStart, thickEnd, color,
blockCount, blockSizes, blockStarts, extraFields)
return ExtendedBedEntry(
chrom, start, end,
if (name == "") "." else name,
score, strand, thickStart, thickEnd, color,
blockCount, blockSizes, blockStarts, extraFields
)
}

private fun String.splitToInts(size: Int): IntArray {
Expand Down Expand Up @@ -179,9 +184,9 @@ data class ExtendedBedEntry(
val end: Int,
/** Name of feature. */
val name: String = ".",
/** A number from [0, 1000] that controls shading of item. */
val score: Short = 0,

// UCSC defines score as an integer in range [0,1000], but almost everyone ignores the range.
/** Feature score */
val score: Int = 0,
/** + or – or . for unknown. */
val strand: Char = '.',
/** The starting position at which the feature is drawn thickly. **/
Expand Down Expand Up @@ -237,7 +242,7 @@ data class ExtendedBedEntry(
result = 31 * result + start
result = 31 * result + end
result = 31 * result + name.hashCode()
result = 31 * result + score
result = 31 * result + score.hashCode()
result = 31 * result + strand.hashCode()
result = 31 * result + thickStart
result = 31 * result + thickEnd
Expand All @@ -250,18 +255,18 @@ data class ExtendedBedEntry(
}

override fun toString() = MoreObjects.toStringHelper(this)
.add("chrom", chrom)
.add("start", start).add("end", end)
.add("name", name)
.add("score", score)
.add("strand", strand)
.add("thickStart", thickStart).add("thickEnd", thickEnd)
.add("itemRgb", itemRgb)
.add("blocks", when {
blockCount == 0 || blockSizes == null -> "[]"
blockStarts == null -> Arrays.toString(blockSizes)
else -> blockStarts.zip(blockSizes)
}).add("extra", extraFields?.joinToString("\t") ?: "")
.add("chrom", chrom)
.add("start", start).add("end", end)
.add("name", name)
.add("score", score)
.add("strand", strand)
.add("thickStart", thickStart).add("thickEnd", thickEnd)
.add("itemRgb", itemRgb)
.add("blocks", when {
blockCount == 0 || blockSizes == null -> "[]"
blockStarts == null -> Arrays.toString(blockSizes)
else -> blockStarts.zip(blockSizes)
}).add("extra", extraFields?.joinToString("\t") ?: "")
.toString()

/**
Expand All @@ -278,8 +283,8 @@ data class ExtendedBedEntry(
check(fieldsNumber in 3..12) { "Fields number expected 3..12, but was $fieldsNumber" }

return BedEntry(
chrom, start, end,
rest(fieldsNumber, extraFieldsNumber).joinToString(delimiter.toString())
chrom, start, end,
rest(fieldsNumber, extraFieldsNumber).joinToString(delimiter.toString())
)
}

Expand Down
13 changes: 12 additions & 1 deletion src/test/kotlin/org/jetbrains/bio/big/BedEntryTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,17 @@ class BedEntryTest {
)
}

@Test fun unpackBed6p4IntScore() {
val bedEntry = BedEntry("chr1", 1, 100, ".\t40000\t+\t34.56398\t-1.00000\t4.91755\t240")
assertEquals(
ExtendedBedEntry(
"chr1", 1, 100, ".", 40000, '+',
extraFields = arrayOf("34.56398", "-1.00000", "4.91755", "240")
),
bedEntry.unpack(fieldsNumber = 6, extraFieldsNumber = 4)
)
}

@Test fun unpackBedEmptyName() {
val bedEntry = BedEntry("chr1", 1, 100, "\t4\t+")
assertEquals(
Expand Down Expand Up @@ -317,7 +328,7 @@ class BedEntryTest {
val actualFields = (0 until 14).map { BED_ENTRY_12_P_2.getField(it, fieldsNumber, extraFieldsNumber) }
val realExtraFieldsNumber = extraFieldsNumber ?: 2
val expectedFields = listOf<Any?>(
"chr1", 10, 30, "be", 5.toShort(), '+', 15, 25, Color(15, 16, 17).rgb,
"chr1", 10, 30, "be", 5, '+', 15, 25, Color(15, 16, 17).rgb,
2, intArrayOf(4, 5), intArrayOf(11, 20)
).slice(0 until fieldsNumber).toMutableList()
expectedFields.addAll(listOf("val1", "4.55").slice(0 until realExtraFieldsNumber))
Expand Down

0 comments on commit 5c8ba14

Please sign in to comment.