diff --git a/scripts/6-repeatAlphabet/1-buildAlphabet.sh b/scripts/6-repeatAlphabet/1-buildAlphabet.sh index 63a6564..69a6b0b 100755 --- a/scripts/6-repeatAlphabet/1-buildAlphabet.sh +++ b/scripts/6-repeatAlphabet/1-buildAlphabet.sh @@ -297,10 +297,15 @@ while [ ${ITER} -le ${TANDEM_SPACERS_ITERATIONS} ]; do mv ${READS_TRANSLATED_FILE} ${READS_TRANSLATED_FILE}-preTspacers mv ${READS_TRANSLATED_BOUNDARIES} ${READS_TRANSLATED_BOUNDARIES}-preTspacers mv ${ALPHABET_FILE} ${ALPHABET_FILE}-preTspacers + mv ${FULLY_UNIQUE_FILE} ${FULLY_UNIQUE_FILE}-preTspacers for THREAD in $(seq 0 ${TO}); do cat ${TMPFILE_PATH}-tspacers-12-${THREAD}.txt >> ${READS_TRANSLATED_FILE} cat ${TMPFILE_PATH}-tspacers-13-${THREAD}.txt >> ${READS_TRANSLATED_BOUNDARIES} done + touch ${FULLY_UNIQUE_FILE} + for i in $(sed -n '/^$/=' ${READS_TRANSLATED_FILE}); do + echo $(( $i - 1 )) >> ${FULLY_UNIQUE_FILE} + done mv ${ALPHABET_FILE_SPACERS} ${ALPHABET_FILE} TANDEM_SPACERS_FIXED="1" echo "Tandem spacers fixed" diff --git a/src/de/mpi_cbg/revant/apps/BuildAssemblyGraph.java b/src/de/mpi_cbg/revant/apps/BuildAssemblyGraph.java index 9b0b78f..92a097f 100644 --- a/src/de/mpi_cbg/revant/apps/BuildAssemblyGraph.java +++ b/src/de/mpi_cbg/revant/apps/BuildAssemblyGraph.java @@ -217,7 +217,7 @@ public static void main(String[] args) throws IOException { componentSize = new int[nComponents]; Math.set(componentSize,nComponents-1,0); for (i=0; i=MIN_COMPONENT_SIZE) componentSize[++j]=i; @@ -279,7 +279,7 @@ public static void main(String[] args) throws IOException { componentSize = new int[nComponents]; Math.set(componentSize,nComponents-1,0); for (i=0; i=MIN_COMPONENT_SIZE) componentSize[++j]=i; @@ -317,21 +317,21 @@ public static void main(String[] args) throws IOException { } - private static final void printHistogram(int[] componentSize, int nComponents) { + private static final void printHistogram(int[] componentSize, int nComponents, int nNodes) { int i; double count; int[] tmpArray; - System.err.println("Cumulative distribution of component size:"); + System.err.println("Number of nodes in components of size >=:"); tmpArray = new int[nComponents]; System.arraycopy(componentSize,0,tmpArray,0,nComponents); Arrays.sort(tmpArray); - count=1.0; - for (i=1; i=0; i--) { + if (tmpArray[i]!=tmpArray[i+1]) System.err.println(tmpArray[i+1]+","+(count/nNodes)); + count+=tmpArray[i]; } - System.err.println(tmpArray[nComponents-1]+",1"); + System.err.println(tmpArray[0]+","+(count/nNodes)); } diff --git a/src/de/mpi_cbg/revant/apps/FixTandemSpacers1.java b/src/de/mpi_cbg/revant/apps/FixTandemSpacers1.java index 0056a5a..cc9d509 100644 --- a/src/de/mpi_cbg/revant/apps/FixTandemSpacers1.java +++ b/src/de/mpi_cbg/revant/apps/FixTandemSpacers1.java @@ -54,14 +54,14 @@ public static void main(String[] args) throws IOException { for (int x=0; x<=RepeatAlphabet.lastSpacer; x++) { - if (RepeatAlphabet.spacers[x].read==42) System.err.println("VITTU> 1 "+RepeatAlphabet.spacers[x]); + if (RepeatAlphabet.spacers[x].read==767) System.err.println("VITTU> 1 "+RepeatAlphabet.spacers[x]); } RepeatAlphabet.loadTandemSpacers_blocks(READ_READ_ALIGNMENTS_FILE,DISTANCE_THRESHOLD,LONG_SPACER_LENGTH,NONREPETITIVE_BLOCKS_MODE,tmpArray); for (int x=0; x<=RepeatAlphabet.lastSpacer; x++) { - if (RepeatAlphabet.spacers[x].read==42) System.err.println("VITTU> 2 "+RepeatAlphabet.spacers[x]); + if (RepeatAlphabet.spacers[x].read==767) System.err.println("VITTU> 2 "+RepeatAlphabet.spacers[x]); } @@ -71,7 +71,7 @@ public static void main(String[] args) throws IOException { for (int x=0; x<=RepeatAlphabet.lastSpacer; x++) { - if (RepeatAlphabet.spacers[x].read==42) System.err.println("VITTU> 3 "+RepeatAlphabet.spacers[x]+" lastSpacerNeighbor="+RepeatAlphabet.lastSpacerNeighbor[x]); + if (RepeatAlphabet.spacers[x].read==767) System.err.println("VITTU> 3 "+RepeatAlphabet.spacers[x]+" lastSpacerNeighbor="+RepeatAlphabet.lastSpacerNeighbor[x]); } @@ -79,7 +79,7 @@ public static void main(String[] args) throws IOException { if (!RepeatAlphabet.propagateSolutions(DISTANCE_THRESHOLD_CONSISTENCY)) { System.out.println("3"); return; } for (int x=0; x<=RepeatAlphabet.lastSpacer; x++) { - if (RepeatAlphabet.spacers[x].read==42) System.err.println("VITTU> 4 "+RepeatAlphabet.spacers[x]); + if (RepeatAlphabet.spacers[x].read==767) System.err.println("VITTU> 4 "+RepeatAlphabet.spacers[x]); } diff --git a/src/de/mpi_cbg/revant/apps/RepeatAlphabet.java b/src/de/mpi_cbg/revant/apps/RepeatAlphabet.java index ee84e09..5082830 100644 --- a/src/de/mpi_cbg/revant/apps/RepeatAlphabet.java +++ b/src/de/mpi_cbg/revant/apps/RepeatAlphabet.java @@ -10138,8 +10138,9 @@ public static final void wobble_buildOld2New(Character[] alphabet_old, int lastA * Remark: wobbling is designed to increase the number of edges in a highly * disconnected overlap graph where the endpoints of repeat occurrences are uncertain. * However, such increase in frequency may make some k-mers be classified as repeats - * rather than as unique addresses on the genome, and this might \emph{remove} some - * edges from the overlap graph. + * rather than as unique addresses in the genome, and this might \emph{remove} some + * edges from the overlap graph (of course it might also make rare noisy k-mers become + * frequent enough to be considered unique addresses in the genome). * * Remark: this procedure might put multiple unique characters inside a block that * contains a single unique character. Thus, throughout the code, no test for the