Adding upstream version 1.44.3.upstream/1.44.3

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-03-09 13:19:22 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-03-09 13:19:22 +0000
commit: c21c3b0befeb46a51b6bf3758ffa30813bea0ff0 (patch)
tree: 9754ff1ca740f6346cf8483ec915d4054bc5da2d /web/server/h2o/libh2o/deps/klib/lua/bio.lua
parent: Adding upstream version 1.43.2. (diff)
download: netdata-c21c3b0befeb46a51b6bf3758ffa30813bea0ff0.tar.xz
netdata-c21c3b0befeb46a51b6bf3758ffa30813bea0ff0.zip
1 files changed, 149 insertions, 0 deletions
diff --git a/web/server/h2o/libh2o/deps/klib/lua/bio.lua b/web/server/h2o/libh2o/deps/klib/lua/bio.lua
new file mode 100644
index 000000000..c9f220059
--- /dev/null
+++ b/web/server/h2o/libh2o/deps/klib/lua/bio.lua
@@ -0,0 +1,149 @@
+-- bioinformatics routines
+
+-- Description: read a fasta/fastq file
+local function readseq(fp)
+	local finished, last = false, nil;
+	return function()
+		local match;
+		if finished then return nil end
+		if (last == nil) then -- the first record or a record following a fastq
+			for l in fp:lines() do
+				if l:byte(1) == 62 or l:byte(1) == 64 then -- ">" || "@"
+					last = l;
+					break;
+				end
+			end
+			if last == nil then
+				finished = true;
+				return nil;
+			end
+		end
+		local tmp = last:find("%s");
+		name = (tmp and last:sub(2, tmp-1)) or last:sub(2); -- sequence name
+		local seqs = {};
+		local c; -- the first character of the last line
+		last = nil;
+		for l in fp:lines() do -- read sequence
+			c = l:byte(1);
+			if c == 62 or c == 64 or c == 43 then
+				last = l;
+				break;
+			end
+			table.insert(seqs, l);
+		end
+		if last == nil then finished = true end -- end of file
+		if c ~= 43 then return name, table.concat(seqs) end -- a fasta record
+		local seq, len = table.concat(seqs), 0; -- prepare to parse quality
+		seqs = {};
+		for l in fp:lines() do -- read quality
+			table.insert(seqs, l);
+			len = len + #l;
+			if len >= #seq then
+				last = nil;
+				return name, seq, table.concat(seqs);
+			end
+		end
+		finished = true;
+		return name, seq;
+	end
+end
+
+-- extract subsequence from a fasta file indexe by samtools faidx
+local function faidxsub(fn)
+	local fpidx = io.open(fn .. ".fai");
+	if fpidx == nil then
+		io.stderr:write("[faidxsub] fail to open the FASTA index file.\n");
+		return nil
+	end
+	local idx = {};
+	for l in fpidx:lines() do
+		local name, len, offset, line_blen, line_len = l:match("(%S+)%s(%d+)%s(%d+)%s(%d+)%s(%d+)");
+		if name then
+			idx[name] = {tonumber(len), offset, line_blen, line_len};
+		end
+	end
+	fpidx:close();
+	local fp = io.open(fn);
+	return function(name, beg_, end_) -- 0-based coordinate
+		if name == nil then fp:close(); return nil; end
+		if idx[name] then
+			local a = idx[name];
+			beg_ = beg_ or 0;
+			end_ = end_ or a[1];
+			end_ = (end_ <= a[1] and end_) or a[1];
+			local fb, fe = math.floor(beg_ / a[3]), math.floor(end_ / a[3]);
+			local qb, qe = beg_ - fb * a[3], end_ - fe * a[3];
+			fp:seek("set", a[2] + fb * a[4] + qb);
+			local s = fp:read((fe - fb) * a[4] + (qe - qb)):gsub("%s", "");
+			return s;
+		end
+	end
+end
+
+--Description: Index a list of intervals and test if a given interval overlaps with the list
+--Example: lua -lbio -e 'a={{100,201},{200,300},{400,600}};f=bio.intvovlp(a);print(f(600,700))'
+--[[
+  By default, we keep for each tiling 8192 window the interval overlaping the
+  window while having the smallest start position. This method may not work
+  well when most intervals are small but few intervals span a long distance.
+]]--
+local function intvovlp(intv, bits)
+	bits = bits or 13 -- the default bin size is 8192 = 1<<13
+	table.sort(intv, function(a,b) return a[1] < b[1] end) -- sort by the start
+	-- merge intervals; the step speeds up testing, but can be skipped
+	local b, e, k = -1, -1, 1
+	for i = 1, #intv do
+		if e < intv[i][1] then
+			if e >= 0 then intv[k], k = {b, e}, k + 1 end
+			b, e = intv[i][1], intv[i][2]
+		else e = intv[i][2] end
+	end
+	if e >= 0 then intv[k] = {b, e} end
+	while #a > k do table.remove(a) end -- truncate the interval list
+	-- build the index for the list of intervals
+	local idx, size, max = {}, math.pow(2, bits), 0
+	for i = 1, #a do
+		b = math.modf(intv[i][1] / size)
+		e = math.modf(intv[i][2] / size)
+		if b == e then idx[b] = idx[b] or i
+		else for j = b, e do idx[j] = idx[j] or i end end
+		max = (max > e and max) or e
+	end
+	-- return a function (closure)
+	return function(_beg, _end)
+		local x = math.modf(_beg / size)
+		if x > max then return false end
+		local off = idx[x]; -- the start bin
+		if off == nil then -- the following is not the best in efficiency
+			for i = x - 1, 0, -1 do -- find the minimum bin with a value
+				if idx[i] ~= nil then off = idx[i]; break; end
+			end
+			if off == nil then return false end
+		end
+		for i = off, #intv do -- start from off and search for overlaps
+			if intv[i][1] >= _end then return false
+			elseif intv[i][2] > _beg then return true end 
+		end
+		return false
+	end
+end
+
+bio = {
+	readseq = readseq,
+	faidxsub = faidxsub,
+	intvovlp = intvovlp
+}
+
+bio.nt16 = {
+[0]=15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15, 15,15, 5, 6,  8,15, 7, 9,  0,10,15,15, 15,15,15,15,
+	15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15, 15,15, 5, 6,  8,15, 7, 9,  0,10,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
+	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15
+}
+bio.ntcnt  = { [0]=4, 1, 1,  2, 1,  2, 2,  3, 1, 2, 2,  3, 2,  3, 3,  4 }
+bio.ntcomp = { [0]=0, 8, 4, 12, 2, 10, 9, 14, 1, 6, 5, 13, 3, 11, 7, 15 }
+bio.ntrev  = 'XACMGRSVTWYHKDBN'
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-03-09 13:19:22 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-03-09 13:19:22 +0000
commit	c21c3b0befeb46a51b6bf3758ffa30813bea0ff0 (patch)
tree	9754ff1ca740f6346cf8483ec915d4054bc5da2d /web/server/h2o/libh2o/deps/klib/lua/bio.lua
parent	Adding upstream version 1.43.2. (diff)
download	netdata-c21c3b0befeb46a51b6bf3758ffa30813bea0ff0.tar.xz netdata-c21c3b0befeb46a51b6bf3758ffa30813bea0ff0.zip