diff options
Diffstat (limited to 'test/bench/go1/fasta_test.go')
-rw-r--r-- | test/bench/go1/fasta_test.go | 177 |
1 files changed, 177 insertions, 0 deletions
diff --git a/test/bench/go1/fasta_test.go b/test/bench/go1/fasta_test.go new file mode 100644 index 0000000..f8bfbf4 --- /dev/null +++ b/test/bench/go1/fasta_test.go @@ -0,0 +1,177 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package go1 + +import "runtime" + +// Not a benchmark; input for revcomp. + +func makefasta() []byte { + var n int = 25e6 + if runtime.GOARCH == "arm" || runtime.GOARCH == "mips" || runtime.GOARCH == "mips64" { + // TODO(dfc) remove this limitation after precise gc. + // A value of 25e6 consumes 465mb of heap on 32bit + // platforms, which is too much for some systems. + // A value of 25e5 produces a memory layout that + // confuses the gc on 32bit platforms. So 25e4 it is. + n = 25e4 + } + return fasta(n) +} + +func fasta(n int) []byte { + out := make(fastaBuffer, 0, 11*n) + + iub := []fastaAcid{ + {prob: 0.27, sym: 'a'}, + {prob: 0.12, sym: 'c'}, + {prob: 0.12, sym: 'g'}, + {prob: 0.27, sym: 't'}, + {prob: 0.02, sym: 'B'}, + {prob: 0.02, sym: 'D'}, + {prob: 0.02, sym: 'H'}, + {prob: 0.02, sym: 'K'}, + {prob: 0.02, sym: 'M'}, + {prob: 0.02, sym: 'N'}, + {prob: 0.02, sym: 'R'}, + {prob: 0.02, sym: 'S'}, + {prob: 0.02, sym: 'V'}, + {prob: 0.02, sym: 'W'}, + {prob: 0.02, sym: 'Y'}, + } + + homosapiens := []fastaAcid{ + {prob: 0.3029549426680, sym: 'a'}, + {prob: 0.1979883004921, sym: 'c'}, + {prob: 0.1975473066391, sym: 'g'}, + {prob: 0.3015094502008, sym: 't'}, + } + + alu := []byte( + "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" + + "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" + + "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" + + "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" + + "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" + + "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" + + "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA") + + out.WriteString(">ONE Homo sapiens alu\n") + fastaRepeat(&out, alu, 2*n) + out.WriteString(">TWO IUB ambiguity codes\n") + fastaRandom(&out, iub, 3*n) + out.WriteString(">THREE Homo sapiens frequency\n") + fastaRandom(&out, homosapiens, 5*n) + return out +} + +type fastaBuffer []byte + +func (b *fastaBuffer) Flush() { + panic("flush") +} + +func (b *fastaBuffer) WriteString(s string) { + p := b.NextWrite(len(s)) + copy(p, s) +} + +func (b *fastaBuffer) NextWrite(n int) []byte { + p := *b + if len(p)+n > cap(p) { + b.Flush() + p = *b + } + out := p[len(p) : len(p)+n] + *b = p[:len(p)+n] + return out +} + +const fastaLine = 60 + +func fastaRepeat(out *fastaBuffer, alu []byte, n int) { + buf := append(alu, alu...) + off := 0 + for n > 0 { + m := n + if m > fastaLine { + m = fastaLine + } + buf1 := out.NextWrite(m + 1) + copy(buf1, buf[off:]) + buf1[m] = '\n' + if off += m; off >= len(alu) { + off -= len(alu) + } + n -= m + } +} + +const ( + fastaLookupSize = 4096 + fastaLookupScale float64 = fastaLookupSize - 1 +) + +var fastaRand uint32 = 42 + +type fastaAcid struct { + sym byte + prob float64 + cprob float64 + next *fastaAcid +} + +func fastaComputeLookup(acid []fastaAcid) *[fastaLookupSize]*fastaAcid { + var lookup [fastaLookupSize]*fastaAcid + var p float64 + for i := range acid { + p += acid[i].prob + acid[i].cprob = p * fastaLookupScale + if i > 0 { + acid[i-1].next = &acid[i] + } + } + acid[len(acid)-1].cprob = 1.0 * fastaLookupScale + + j := 0 + for i := range lookup { + for acid[j].cprob < float64(i) { + j++ + } + lookup[i] = &acid[j] + } + + return &lookup +} + +func fastaRandom(out *fastaBuffer, acid []fastaAcid, n int) { + const ( + IM = 139968 + IA = 3877 + IC = 29573 + ) + lookup := fastaComputeLookup(acid) + for n > 0 { + m := n + if m > fastaLine { + m = fastaLine + } + buf := out.NextWrite(m + 1) + f := fastaLookupScale / IM + myrand := fastaRand + for i := 0; i < m; i++ { + myrand = (myrand*IA + IC) % IM + r := float64(int(myrand)) * f + a := lookup[int(r)] + for a.cprob < r { + a = a.next + } + buf[i] = a.sym + } + fastaRand = myrand + buf[m] = '\n' + n -= m + } +} |