summaryrefslogtreecommitdiff
path: root/lua_benchmark/tests/Lua-Benchmarks/regex-dna.lua
diff options
context:
space:
mode:
authorMatthew Sotoudeh <matthew@masot.net>2024-05-17 15:57:30 -0700
committerMatthew Sotoudeh <matthew@masot.net>2024-05-17 15:57:30 -0700
commitd068f0b3c11348a50c18af1ee3b0d2e5f38c4faf (patch)
treedb777acca2336f8c279e9f09346f02de7ddaa0e9 /lua_benchmark/tests/Lua-Benchmarks/regex-dna.lua
parent221b05e7a86faa38036429d5fbfc8b0779eb5382 (diff)
lua benchmarks
Diffstat (limited to 'lua_benchmark/tests/Lua-Benchmarks/regex-dna.lua')
-rw-r--r--lua_benchmark/tests/Lua-Benchmarks/regex-dna.lua59
1 files changed, 59 insertions, 0 deletions
diff --git a/lua_benchmark/tests/Lua-Benchmarks/regex-dna.lua b/lua_benchmark/tests/Lua-Benchmarks/regex-dna.lua
new file mode 100644
index 0000000..2627e05
--- /dev/null
+++ b/lua_benchmark/tests/Lua-Benchmarks/regex-dna.lua
@@ -0,0 +1,59 @@
+-- The Computer Language Benchmarks Game
+-- http://benchmarksgame.alioth.debian.org/
+-- contributed by Jim Roseborough
+-- modified by Victor Tang
+-- optimized & replaced inefficient use of gsub with gmatch
+-- partitioned sequence to prevent extraneous redundant string copy
+
+seq = io.read("*a")
+ilen, seq = #seq, seq:gsub('>[^%c]*%c*', ''):gsub('%c+', '')
+clen = #seq
+
+local variants = { 'agggtaaa|tttaccct',
+ '[cgt]gggtaaa|tttaccc[acg]',
+ 'a[act]ggtaaa|tttacc[agt]t',
+ 'ag[act]gtaaa|tttac[agt]ct',
+ 'agg[act]taaa|ttta[agt]cct',
+ 'aggg[acg]aaa|ttt[cgt]ccct',
+ 'agggt[cgt]aa|tt[acg]accct',
+ 'agggta[cgt]a|t[acg]taccct',
+ 'agggtaa[cgt]|[acg]ttaccct', }
+
+local subst = { B='(c|g|t)', D='(a|g|t)', H='(a|c|t)', K='(g|t)',
+ M='(a|c)', N='(a|c|g|t)', R='(a|g)', S='(c|g)',
+ V='(a|c|g)', W='(a|t)', Y='(c|t)' }
+
+function countmatches(variant)
+ local n = 0
+ variant:gsub('([^|]+)|?', function(pattern)
+ for _ in seq:gmatch(pattern) do n = n + 1 end
+ end)
+ return n
+end
+
+for _, p in ipairs(variants) do
+ io.write( string.format('%s %d\n', p, countmatches(p)) )
+end
+
+function partitionstring(seq)
+ local seg = math.floor( math.sqrt(#seq) )
+ local seqtable = {}
+ for nextstart = 1, #seq, seg do
+ table.insert(seqtable, seq:sub(nextstart, nextstart + seg - 1))
+ end
+ return seqtable
+end
+function chunk_gsub(t, k, v)
+ for i, p in ipairs(t) do
+ t[i] = p:find(k) and p:gsub(k, v) or t[i]
+ end
+ return t
+end
+
+seq = partitionstring(seq)
+for k, v in pairs(subst) do
+ chunk_gsub(seq, k, v)
+end
+seq = table.concat(seq)
+io.write(string.format('\n%d\n%d\n%d\n', ilen, clen, #seq))
+
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback