From b5b08d2027f8a1c00a0ceee02f43e219c281b2cb Mon Sep 17 00:00:00 2001 From: Travis Harrison Date: Fri, 27 Apr 2018 10:53:29 -0500 Subject: [PATCH] remove unused ReadRaw functions --- shock-server/node/file/format/fasta/fasta.go | 21 -- shock-server/node/file/format/fastq/fastq.go | 43 +---- shock-server/node/file/format/multi/multi.go | 10 - shock-server/node/file/format/sam/sam.go | 19 -- shock-server/node/file/format/sam/sam_test.go | 179 ------------------ shock-server/node/file/format/seq/seq.go | 2 - 6 files changed, 1 insertion(+), 273 deletions(-) delete mode 100644 shock-server/node/file/format/sam/sam_test.go diff --git a/shock-server/node/file/format/fasta/fasta.go b/shock-server/node/file/format/fasta/fasta.go index db7c2197..398df26f 100644 --- a/shock-server/node/file/format/fasta/fasta.go +++ b/shock-server/node/file/format/fasta/fasta.go @@ -61,27 +61,6 @@ func (self *Reader) Read() (sequence *seq.Seq, err error) { return } -// Read a single sequence and return it or an error. -func (self *Reader) ReadRaw(p []byte) (n int, err error) { - if self.r == nil { - self.r = bufio.NewReader(self.f) - } - p[n] = byte('>') - n = 1 - for { - read, er := self.r.ReadBytes('>') - if len(read) > 1 { - copy(p[n:n+len(read)-1], read[0:len(read)-1]) - n += len(read) - 1 - break - } else if er != nil { - err = er - break - } - } - return -} - // Read a single sequence and return read offset for indexing. func (self *Reader) GetReadOffset() (n int, err error) { if self.r == nil { diff --git a/shock-server/node/file/format/fastq/fastq.go b/shock-server/node/file/format/fastq/fastq.go index b1476013..5f9d18ce 100644 --- a/shock-server/node/file/format/fastq/fastq.go +++ b/shock-server/node/file/format/fastq/fastq.go @@ -17,7 +17,7 @@ import ( ) var ( - Regex = regexp.MustCompile(`^[\n\r]*@[\S\t ]+[\n\r]+[A-Za-z\-]+[\n\r]+\+[\S\t ]*[\n\r]+\S*[\n\r]+`) + Regex = regexp.MustCompile(`^[\n\r]*@\S+[\S\t ]+[\n\r]+[A-Za-z\-]+[\n\r]+\+[\S\t ]*[\n\r]+\S*[\n\r]+`) ) // Fastq sequence format reader type. @@ -97,47 +97,6 @@ READ: return } -func (self *Reader) ReadRaw(p []byte) (n int, err error) { - if self.r == nil { - self.r = bufio.NewReader(self.f) - } - curr := 0 - id, err := self.r.ReadBytes('\n') - if err != nil { - return 0, err - } else if !bytes.HasPrefix(id, []byte{'@'}) { - return 0, errors.New("Invalid format: id line does not start with @") - } - copy(p[curr:len(id)+curr], id) - curr += len(id) - - seq, err := self.r.ReadBytes('\n') - if err != nil { - return 0, err - } - copy(p[curr:len(seq)+curr], seq) - curr += len(seq) - - plus, err := self.r.ReadBytes('\n') - if err != nil { - return 0, err - } else if !bytes.HasPrefix(plus, []byte{'+'}) { - return 0, errors.New("Invalid format: plus line does not start with +") - } - copy(p[curr:len(plus)+curr], plus) - curr += len(plus) - - qual, err := self.r.ReadBytes('\n') - if err != nil { - return 0, err - } else if len(seq) != len(qual) { - return 0, errors.New("Invalid format: length of sequence and quality lines do not match") - } - copy(p[curr:len(qual)+curr], qual) - n = curr + len(qual) - return -} - // Read a single sequence and return read offset for indexing. func (self *Reader) GetReadOffset() (n int, err error) { if self.r == nil { diff --git a/shock-server/node/file/format/multi/multi.go b/shock-server/node/file/format/multi/multi.go index 1e812a6c..964ead23 100644 --- a/shock-server/node/file/format/multi/multi.go +++ b/shock-server/node/file/format/multi/multi.go @@ -71,16 +71,6 @@ func (r *Reader) Read() (*seq.Seq, error) { return r.r.Read() } -func (r *Reader) ReadRaw(p []byte) (n int, err error) { - if r.r == nil { - err := r.DetermineFormat() - if err != nil { - return 0, err - } - } - return r.r.ReadRaw(p) -} - func (r *Reader) GetReadOffset() (n int, err error) { if r.r == nil { err := r.DetermineFormat() diff --git a/shock-server/node/file/format/sam/sam.go b/shock-server/node/file/format/sam/sam.go index 4c95ed1d..fe28bbb2 100644 --- a/shock-server/node/file/format/sam/sam.go +++ b/shock-server/node/file/format/sam/sam.go @@ -78,25 +78,6 @@ func (self *Reader) Read() (sequence *seq.Seq, err error) { return } -// Read a single sequence and return it or an error. (used for making record index) -func (self *Reader) ReadRaw(p []byte) (n int, err error) { - for { - read, er := self.r.ReadBytes('\n') - n += len(read) - if len(read) > 1 { - if read[0] == '@' { - continue - } - copy(p[0:len(read)], read[0:len(read)]) - break - } else if er != nil { - err = er - break - } - } - return -} - // Read a single sequence and return read offset for indexing. func (self *Reader) GetReadOffset() (n int, err error) { for { diff --git a/shock-server/node/file/format/sam/sam_test.go b/shock-server/node/file/format/sam/sam_test.go deleted file mode 100644 index d5018e6f..00000000 --- a/shock-server/node/file/format/sam/sam_test.go +++ /dev/null @@ -1,179 +0,0 @@ -package sam - -import ( - "fmt" - "io" - "io/ioutil" - "os" - "testing" -) - -var ( - gopath = os.Getenv("GOPATH") - sample = gopath + "/src/github.com/MG-RAST/Shock/shock-server/testdata/sample1.sam" - Idx [][]int64 //list of {offset, length} pair -) - -func TestValid(t *testing.T) { - f, _ := ioutil.ReadFile(sample) - println("valid") - println(Regex.Match(f)) - println("invalid:") - for _, s := range invalid { - println(Regex.MatchString(s)) - } -} - -var invalid = []string{`>S1 rank=0000056 x=2202.0 y=484.0 length=288 -TGAATGTATTCCAGTAAACCGCCCGCGCAAGTAGGCTTCAAATGCCTGCACATTGTCCGTGCCGCGTTTTCAAAGTTTCTGTTCTTCGCCCGAAAGAATAGGAAGAATTGATTTGGCGACCTGTTCGGAAACAATATCTTCAAGCGTCAAAACGTCGCTGAATTTTCATCAAAGGTCTTCGCCCAACACGTCGAATTATCCTTGACGCTCAAAAGCTGCGCTGAAATGCGAATTCTATCACCGACGCGGCGGAGATTACCGTCAAGAATAAAATCAACGCCGAGTTCG`, - `@SEQ_ID -GATTTGGGGTTCAAAGCAGTATCGATCAAATAGTAAATCCATTTGTTCAACTCACAGTTT -+ -!''*((((***+))%%%++)(%%%%).1***-+*''))**55CCF>>>>>>CCCCCCC65 -@SEQ_ID -GATTTGGGGTTCAAAGCAGTATCGATCAAATAGTAAATCCATTTGTTCAACTCACAGTTT -+ -!''*((((***+))%%%++)(%%%%).1***-+*''))**55CCF>>>>>>CCCCCCC65`, `@SEQ_ID -GATTTGGGGTTCAAAGCAGTATCGATCAAATAGTAAATCCATTTGTTCAACTCACAGTTT -+junkhhere -!''*((((***+))%%%++)(%%%%).1***-+*''))**55CCF>>>>>>CCCCCCC65 -@SEQ_ID -GATTTGGGGTTCAAAGCAGTATCGATCAAATAGTAAATCCATTTGTTCAACTCACAGTTT -+junkhhere -!''*((((***+))%%%++)(%%%%).1***-+*''))**55CCF>>>>>>CCCCCCC65`} - -func TestRead(t *testing.T) { - var ( - obtainN [][]byte - obtainS [][]byte - ) - - if r, err := NewReaderName(sample); err != nil { - t.Errorf("Failed to open test file %s: %v", sample, err.Error()) - } else { - for i := 0; i < 2; i++ { - var linect int - for { - if s, err := r.Read(); err != nil { - if err == io.EOF { - break - } else { - t.Errorf("Failed to read %s: %v", sample, err.Error()) - } - } else { - fmt.Println(i + 1) - obtainN = append(obtainN, s.ID) - obtainS = append(obtainS, s.Seq) - linect += 1 - //fmt.Printf("line %d = %s\n", linect, s.Seq) - } - } - obtainN = nil - obtainS = nil - if err = r.Rewind(); err != nil { - t.Errorf("Failed to rewind %s: %v", sample, err.Error()) - } - } - //r.Close() - } -} - -func TestReadRaw(t *testing.T) { - if r, err := NewReaderName(sample); err != nil { - t.Errorf("Failed to open test file %s: %v", sample, err.Error()) - } else { - for i := 0; i < 2; i++ { - var linect int - for { - buf := make([]byte, 32*1024) - - if n, err := r.ReadRaw(buf); err != nil { - if err == io.EOF { - break - } else { - t.Errorf("Fail to read in TestReadRaw() %s: %v", sample, err.Error()) - } - } else { - linect += 1 - fmt.Printf("line=%d, length=%d, line_content=%s\n", linect, n, buf) - } - } - - if err = r.Rewind(); err != nil { - t.Errorf("Failed to rewind %s: %v", sample, err.Error()) - } - } - //r.Close() - } -} - -func TestCreateIndex(t *testing.T) { - curr := int64(0) - - if r, err := NewReaderName(sample); err != nil { - t.Errorf("Failed to open test file %s: %v", sample, err.Error()) - } else { - for { - buf := make([]byte, 32*1024) - - if n, err := r.ReadRaw(buf); err != nil { - if err == io.EOF { - break - } else { - t.Errorf("Fail to read in TestCreatIndex() %s: %v", sample, err.Error()) - } - } else { - Idx = append(Idx, []int64{curr, int64(n)}) - curr += int64(n) - } - } - - if err = r.Rewind(); err != nil { - t.Errorf("Failed to rewind %s: %v", sample, err.Error()) - } - - fmt.Printf("indices= %v", Idx) - //r.Close() - } - return -} - -func TestReadSeqByIndex(t *testing.T) { - rs := make([]*io.SectionReader, 1000) - - if fd, err := os.Open(sample); err != nil { - t.Errorf("Failed to open test file %s: %v", sample, err.Error()) - } else { - for i := 1; i <= len(Idx); i++ { - pos := Idx[i-1][0] - length := Idx[i-1][1] - fmt.Printf("record %d: reading from pos=%d for length %d\n", i, pos, length) - if err != nil { - t.Errorf("invalid index part %d: %v", i, err.Error()) - return - } - rs = append(rs, io.NewSectionReader(fd, pos, length)) - } - - i := 1 - for _, sec_reader := range rs { - if sec_reader != nil { - buf := make([]byte, 32*1024) - if n, err := sec_reader.ReadAt(buf, 0); err != nil { - if err == io.EOF { - break - } else { - t.Errorf("Fail to read in TestReadSeqByIndex() %s: %v", sample, err.Error()) - } - } else { - fmt.Printf("record=%d, size=%d, seq=%s\n", i, n, buf) - i += 1 - } - - } - } - - fd.Close() - } - return -} diff --git a/shock-server/node/file/format/seq/seq.go b/shock-server/node/file/format/seq/seq.go index 9a29857e..3331b598 100644 --- a/shock-server/node/file/format/seq/seq.go +++ b/shock-server/node/file/format/seq/seq.go @@ -26,14 +26,12 @@ type ReadFormater interface { type Reader interface { Read() (*Seq, error) - ReadRaw(p []byte) (int, error) GetReadOffset() (int, error) SeekChunk(int64) (int64, error) } type ReadRewinder interface { Read() (*Seq, error) - ReadRaw(p []byte) (int, error) GetReadOffset() (int, error) SeekChunk(int64) (int64, error) Rewind() error