summaryrefslogtreecommitdiffstats
path: root/test/bench/go1/fasta_test.go
diff options
context:
space:
mode:
Diffstat (limited to 'test/bench/go1/fasta_test.go')
-rw-r--r--test/bench/go1/fasta_test.go177
1 files changed, 177 insertions, 0 deletions
diff --git a/test/bench/go1/fasta_test.go b/test/bench/go1/fasta_test.go
new file mode 100644
index 0000000..f8bfbf4
--- /dev/null
+++ b/test/bench/go1/fasta_test.go
@@ -0,0 +1,177 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package go1
+
+import "runtime"
+
+// Not a benchmark; input for revcomp.
+
+func makefasta() []byte {
+ var n int = 25e6
+ if runtime.GOARCH == "arm" || runtime.GOARCH == "mips" || runtime.GOARCH == "mips64" {
+ // TODO(dfc) remove this limitation after precise gc.
+ // A value of 25e6 consumes 465mb of heap on 32bit
+ // platforms, which is too much for some systems.
+ // A value of 25e5 produces a memory layout that
+ // confuses the gc on 32bit platforms. So 25e4 it is.
+ n = 25e4
+ }
+ return fasta(n)
+}
+
+func fasta(n int) []byte {
+ out := make(fastaBuffer, 0, 11*n)
+
+ iub := []fastaAcid{
+ {prob: 0.27, sym: 'a'},
+ {prob: 0.12, sym: 'c'},
+ {prob: 0.12, sym: 'g'},
+ {prob: 0.27, sym: 't'},
+ {prob: 0.02, sym: 'B'},
+ {prob: 0.02, sym: 'D'},
+ {prob: 0.02, sym: 'H'},
+ {prob: 0.02, sym: 'K'},
+ {prob: 0.02, sym: 'M'},
+ {prob: 0.02, sym: 'N'},
+ {prob: 0.02, sym: 'R'},
+ {prob: 0.02, sym: 'S'},
+ {prob: 0.02, sym: 'V'},
+ {prob: 0.02, sym: 'W'},
+ {prob: 0.02, sym: 'Y'},
+ }
+
+ homosapiens := []fastaAcid{
+ {prob: 0.3029549426680, sym: 'a'},
+ {prob: 0.1979883004921, sym: 'c'},
+ {prob: 0.1975473066391, sym: 'g'},
+ {prob: 0.3015094502008, sym: 't'},
+ }
+
+ alu := []byte(
+ "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" +
+ "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" +
+ "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" +
+ "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" +
+ "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" +
+ "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" +
+ "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA")
+
+ out.WriteString(">ONE Homo sapiens alu\n")
+ fastaRepeat(&out, alu, 2*n)
+ out.WriteString(">TWO IUB ambiguity codes\n")
+ fastaRandom(&out, iub, 3*n)
+ out.WriteString(">THREE Homo sapiens frequency\n")
+ fastaRandom(&out, homosapiens, 5*n)
+ return out
+}
+
+type fastaBuffer []byte
+
+func (b *fastaBuffer) Flush() {
+ panic("flush")
+}
+
+func (b *fastaBuffer) WriteString(s string) {
+ p := b.NextWrite(len(s))
+ copy(p, s)
+}
+
+func (b *fastaBuffer) NextWrite(n int) []byte {
+ p := *b
+ if len(p)+n > cap(p) {
+ b.Flush()
+ p = *b
+ }
+ out := p[len(p) : len(p)+n]
+ *b = p[:len(p)+n]
+ return out
+}
+
+const fastaLine = 60
+
+func fastaRepeat(out *fastaBuffer, alu []byte, n int) {
+ buf := append(alu, alu...)
+ off := 0
+ for n > 0 {
+ m := n
+ if m > fastaLine {
+ m = fastaLine
+ }
+ buf1 := out.NextWrite(m + 1)
+ copy(buf1, buf[off:])
+ buf1[m] = '\n'
+ if off += m; off >= len(alu) {
+ off -= len(alu)
+ }
+ n -= m
+ }
+}
+
+const (
+ fastaLookupSize = 4096
+ fastaLookupScale float64 = fastaLookupSize - 1
+)
+
+var fastaRand uint32 = 42
+
+type fastaAcid struct {
+ sym byte
+ prob float64
+ cprob float64
+ next *fastaAcid
+}
+
+func fastaComputeLookup(acid []fastaAcid) *[fastaLookupSize]*fastaAcid {
+ var lookup [fastaLookupSize]*fastaAcid
+ var p float64
+ for i := range acid {
+ p += acid[i].prob
+ acid[i].cprob = p * fastaLookupScale
+ if i > 0 {
+ acid[i-1].next = &acid[i]
+ }
+ }
+ acid[len(acid)-1].cprob = 1.0 * fastaLookupScale
+
+ j := 0
+ for i := range lookup {
+ for acid[j].cprob < float64(i) {
+ j++
+ }
+ lookup[i] = &acid[j]
+ }
+
+ return &lookup
+}
+
+func fastaRandom(out *fastaBuffer, acid []fastaAcid, n int) {
+ const (
+ IM = 139968
+ IA = 3877
+ IC = 29573
+ )
+ lookup := fastaComputeLookup(acid)
+ for n > 0 {
+ m := n
+ if m > fastaLine {
+ m = fastaLine
+ }
+ buf := out.NextWrite(m + 1)
+ f := fastaLookupScale / IM
+ myrand := fastaRand
+ for i := 0; i < m; i++ {
+ myrand = (myrand*IA + IC) % IM
+ r := float64(int(myrand)) * f
+ a := lookup[int(r)]
+ for a.cprob < r {
+ a = a.next
+ }
+ buf[i] = a.sym
+ }
+ fastaRand = myrand
+ buf[m] = '\n'
+ n -= m
+ }
+}