8 #ifndef BTLLIB_NTHASH_HPP
9 #define BTLLIB_NTHASH_HPP
19 const uint8_t cp_off = 0x07;
22 const int multishift = 27;
25 static const uint64_t multiseed = 0x90b45d39fb6da1fa;
28 static const uint64_t seedA = 0x3c8bfbb395c60474;
29 static const uint64_t seedC = 0x3193c18562a02b4c;
30 static const uint64_t seedG = 0x20323ed082572324;
31 static const uint64_t seedT = 0x295549f54be24456;
32 static const uint64_t seedN = 0x0000000000000000;
34 static const int ASCII_SIZE = 256;
36 static const uint64_t seed_tab[ASCII_SIZE] = {
37 seedN, seedT, seedN, seedG, seedA, seedA, seedN, seedC,
38 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN,
39 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN,
40 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN,
41 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN,
42 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN,
43 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN,
44 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN,
45 seedN, seedA, seedN, seedC, seedN, seedN, seedN, seedG,
46 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN,
47 seedN, seedN, seedN, seedN, seedT, seedT, seedN, seedN,
48 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN,
49 seedN, seedA, seedN, seedC, seedN, seedN, seedN, seedG,
50 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN,
51 seedN, seedN, seedN, seedN, seedT, seedT, seedN, seedN,
52 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN,
53 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN,
54 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN,
55 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN,
56 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN,
57 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN,
58 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN,
59 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN,
60 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN,
61 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN,
62 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN,
63 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN,
64 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN,
65 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN,
66 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN,
67 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN,
68 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN
71 static const uint64_t A33r[33] = {
72 0x195c60474, 0x12b8c08e9, 0x571811d3, 0xae3023a6, 0x15c60474c, 0xb8c08e99,
73 0x171811d32, 0xe3023a65, 0x1c60474ca, 0x18c08e995, 0x11811d32b, 0x3023a657,
74 0x60474cae, 0xc08e995c, 0x1811d32b8, 0x1023a6571, 0x474cae3, 0x8e995c6,
75 0x11d32b8c, 0x23a65718, 0x474cae30, 0x8e995c60, 0x11d32b8c0, 0x3a657181,
76 0x74cae302, 0xe995c604, 0x1d32b8c08, 0x1a6571811, 0x14cae3023, 0x995c6047,
77 0x132b8c08e, 0x6571811d, 0xcae3023a
80 static const uint64_t A31l[31] = {
81 0x3c8bfbb200000000, 0x7917f76400000000, 0xf22feec800000000,
82 0xe45fdd9200000000, 0xc8bfbb2600000000, 0x917f764e00000000,
83 0x22feec9e00000000, 0x45fdd93c00000000, 0x8bfbb27800000000,
84 0x17f764f200000000, 0x2feec9e400000000, 0x5fdd93c800000000,
85 0xbfbb279000000000, 0x7f764f2200000000, 0xfeec9e4400000000,
86 0xfdd93c8a00000000, 0xfbb2791600000000, 0xf764f22e00000000,
87 0xeec9e45e00000000, 0xdd93c8be00000000, 0xbb27917e00000000,
88 0x764f22fe00000000, 0xec9e45fc00000000, 0xd93c8bfa00000000,
89 0xb27917f600000000, 0x64f22fee00000000, 0xc9e45fdc00000000,
90 0x93c8bfba00000000, 0x27917f7600000000, 0x4f22feec00000000,
94 static const uint64_t C33r[33] = {
95 0x162a02b4c, 0xc5405699, 0x18a80ad32, 0x115015a65, 0x2a02b4cb, 0x54056996,
96 0xa80ad32c, 0x15015a658, 0xa02b4cb1, 0x140569962, 0x80ad32c5, 0x1015a658a,
97 0x2b4cb15, 0x569962a, 0xad32c54, 0x15a658a8, 0x2b4cb150, 0x569962a0,
98 0xad32c540, 0x15a658a80, 0xb4cb1501, 0x169962a02, 0xd32c5405, 0x1a658a80a,
99 0x14cb15015, 0x9962a02b, 0x132c54056, 0x658a80ad, 0xcb15015a, 0x1962a02b4,
100 0x12c540569, 0x58a80ad3, 0xb15015a6
103 static const uint64_t C31l[31] = {
104 0x3193c18400000000, 0x6327830800000000, 0xc64f061000000000,
105 0x8c9e0c2200000000, 0x193c184600000000, 0x3278308c00000000,
106 0x64f0611800000000, 0xc9e0c23000000000, 0x93c1846200000000,
107 0x278308c600000000, 0x4f06118c00000000, 0x9e0c231800000000,
108 0x3c18463200000000, 0x78308c6400000000, 0xf06118c800000000,
109 0xe0c2319200000000, 0xc184632600000000, 0x8308c64e00000000,
110 0x6118c9e00000000, 0xc23193c00000000, 0x1846327800000000,
111 0x308c64f000000000, 0x6118c9e000000000, 0xc23193c000000000,
112 0x8463278200000000, 0x8c64f0600000000, 0x118c9e0c00000000,
113 0x23193c1800000000, 0x4632783000000000, 0x8c64f06000000000,
117 static const uint64_t G33r[33] = {
118 0x82572324, 0x104ae4648, 0x95c8c91, 0x12b91922, 0x25723244, 0x4ae46488,
119 0x95c8c910, 0x12b919220, 0x57232441, 0xae464882, 0x15c8c9104, 0xb9192209,
120 0x172324412, 0xe4648825, 0x1c8c9104a, 0x191922095, 0x12324412b, 0x46488257,
121 0x8c9104ae, 0x11922095c, 0x324412b9, 0x64882572, 0xc9104ae4, 0x1922095c8,
122 0x124412b91, 0x48825723, 0x9104ae46, 0x122095c8c, 0x4412b919, 0x88257232,
123 0x1104ae464, 0x2095c8c9, 0x412b9192
126 static const uint64_t G31l[31] = {
127 0x20323ed000000000, 0x40647da000000000, 0x80c8fb4000000000,
128 0x191f68200000000, 0x323ed0400000000, 0x647da0800000000,
129 0xc8fb41000000000, 0x191f682000000000, 0x323ed04000000000,
130 0x647da08000000000, 0xc8fb410000000000, 0x91f6820200000000,
131 0x23ed040600000000, 0x47da080c00000000, 0x8fb4101800000000,
132 0x1f68203200000000, 0x3ed0406400000000, 0x7da080c800000000,
133 0xfb41019000000000, 0xf682032200000000, 0xed04064600000000,
134 0xda080c8e00000000, 0xb410191e00000000, 0x6820323e00000000,
135 0xd040647c00000000, 0xa080c8fa00000000, 0x410191f600000000,
136 0x820323ec00000000, 0x40647da00000000, 0x80c8fb400000000,
140 static const uint64_t T33r[33] = {
141 0x14be24456, 0x97c488ad, 0x12f89115a, 0x5f1222b5, 0xbe24456a, 0x17c488ad4,
142 0xf89115a9, 0x1f1222b52, 0x1e24456a5, 0x1c488ad4b, 0x189115a97, 0x11222b52f,
143 0x24456a5f, 0x488ad4be, 0x9115a97c, 0x1222b52f8, 0x4456a5f1, 0x88ad4be2,
144 0x1115a97c4, 0x22b52f89, 0x456a5f12, 0x8ad4be24, 0x115a97c48, 0x2b52f891,
145 0x56a5f122, 0xad4be244, 0x15a97c488, 0xb52f8911, 0x16a5f1222, 0xd4be2445,
146 0x1a97c488a, 0x152f89115, 0xa5f1222b
149 static const uint64_t T31l[31] = {
150 0x295549f400000000, 0x52aa93e800000000, 0xa55527d000000000,
151 0x4aaa4fa200000000, 0x95549f4400000000, 0x2aa93e8a00000000,
152 0x55527d1400000000, 0xaaa4fa2800000000, 0x5549f45200000000,
153 0xaa93e8a400000000, 0x5527d14a00000000, 0xaa4fa29400000000,
154 0x549f452a00000000, 0xa93e8a5400000000, 0x527d14aa00000000,
155 0xa4fa295400000000, 0x49f452aa00000000, 0x93e8a55400000000,
156 0x27d14aaa00000000, 0x4fa2955400000000, 0x9f452aa800000000,
157 0x3e8a555200000000, 0x7d14aaa400000000, 0xfa29554800000000,
158 0xf452aa9200000000, 0xe8a5552600000000, 0xd14aaa4e00000000,
159 0xa295549e00000000, 0x452aa93e00000000, 0x8a55527c00000000,
163 static const uint64_t N33r[33] = {
164 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN,
165 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN,
166 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN
169 static const uint64_t N31l[31] = {
170 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN,
171 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN,
172 seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN, seedN
175 static const uint64_t* ms_tab_33r[ASCII_SIZE] = {
176 N33r, T33r, N33r, G33r, A33r, A33r, N33r, C33r,
177 N33r, N33r, N33r, N33r, N33r, N33r, N33r, N33r,
178 N33r, N33r, N33r, N33r, N33r, N33r, N33r, N33r,
179 N33r, N33r, N33r, N33r, N33r, N33r, N33r, N33r,
180 N33r, N33r, N33r, N33r, N33r, N33r, N33r, N33r,
181 N33r, N33r, N33r, N33r, N33r, N33r, N33r, N33r,
182 N33r, N33r, N33r, N33r, N33r, N33r, N33r, N33r,
183 N33r, N33r, N33r, N33r, N33r, N33r, N33r, N33r,
184 N33r, A33r, N33r, C33r, N33r, N33r, N33r, G33r,
185 N33r, N33r, N33r, N33r, N33r, N33r, N33r, N33r,
186 N33r, N33r, N33r, N33r, T33r, T33r, N33r, N33r,
187 N33r, N33r, N33r, N33r, N33r, N33r, N33r, N33r,
188 N33r, A33r, N33r, C33r, N33r, N33r, N33r, G33r,
189 N33r, N33r, N33r, N33r, N33r, N33r, N33r, N33r,
190 N33r, N33r, N33r, N33r, T33r, T33r, N33r, N33r,
191 N33r, N33r, N33r, N33r, N33r, N33r, N33r, N33r,
192 N33r, N33r, N33r, N33r, N33r, N33r, N33r, N33r,
193 N33r, N33r, N33r, N33r, N33r, N33r, N33r, N33r,
194 N33r, N33r, N33r, N33r, N33r, N33r, N33r, N33r,
195 N33r, N33r, N33r, N33r, N33r, N33r, N33r, N33r,
196 N33r, N33r, N33r, N33r, N33r, N33r, N33r, N33r,
197 N33r, N33r, N33r, N33r, N33r, N33r, N33r, N33r,
198 N33r, N33r, N33r, N33r, N33r, N33r, N33r, N33r,
199 N33r, N33r, N33r, N33r, N33r, N33r, N33r, N33r,
200 N33r, N33r, N33r, N33r, N33r, N33r, N33r, N33r,
201 N33r, N33r, N33r, N33r, N33r, N33r, N33r, N33r,
202 N33r, N33r, N33r, N33r, N33r, N33r, N33r, N33r,
203 N33r, N33r, N33r, N33r, N33r, N33r, N33r, N33r,
204 N33r, N33r, N33r, N33r, N33r, N33r, N33r, N33r,
205 N33r, N33r, N33r, N33r, N33r, N33r, N33r, N33r,
206 N33r, N33r, N33r, N33r, N33r, N33r, N33r, N33r,
207 N33r, N33r, N33r, N33r, N33r, N33r, N33r, N33r
210 static const uint64_t* ms_tab_31l[ASCII_SIZE] = {
211 N31l, T31l, N31l, G31l, A31l, A31l, N31l, C31l,
212 N31l, N31l, N31l, N31l, N31l, N31l, N31l, N31l,
213 N31l, N31l, N31l, N31l, N31l, N31l, N31l, N31l,
214 N31l, N31l, N31l, N31l, N31l, N31l, N31l, N31l,
215 N31l, N31l, N31l, N31l, N31l, N31l, N31l, N31l,
216 N31l, N31l, N31l, N31l, N31l, N31l, N31l, N31l,
217 N31l, N31l, N31l, N31l, N31l, N31l, N31l, N31l,
218 N31l, N31l, N31l, N31l, N31l, N31l, N31l, N31l,
219 N31l, A31l, N31l, C31l, N31l, N31l, N31l, G31l,
220 N31l, N31l, N31l, N31l, N31l, N31l, N31l, N31l,
221 N31l, N31l, N31l, N31l, T31l, T31l, N31l, N31l,
222 N31l, N31l, N31l, N31l, N31l, N31l, N31l, N31l,
223 N31l, A31l, N31l, C31l, N31l, N31l, N31l, G31l,
224 N31l, N31l, N31l, N31l, N31l, N31l, N31l, N31l,
225 N31l, N31l, N31l, N31l, T31l, T31l, N31l, N31l,
226 N31l, N31l, N31l, N31l, N31l, N31l, N31l, N31l,
227 N31l, N31l, N31l, N31l, N31l, N31l, N31l, N31l,
228 N31l, N31l, N31l, N31l, N31l, N31l, N31l, N31l,
229 N31l, N31l, N31l, N31l, N31l, N31l, N31l, N31l,
230 N31l, N31l, N31l, N31l, N31l, N31l, N31l, N31l,
231 N31l, N31l, N31l, N31l, N31l, N31l, N31l, N31l,
232 N31l, N31l, N31l, N31l, N31l, N31l, N31l, N31l,
233 N31l, N31l, N31l, N31l, N31l, N31l, N31l, N31l,
234 N31l, N31l, N31l, N31l, N31l, N31l, N31l, N31l,
235 N31l, N31l, N31l, N31l, N31l, N31l, N31l, N31l,
236 N31l, N31l, N31l, N31l, N31l, N31l, N31l, N31l,
237 N31l, N31l, N31l, N31l, N31l, N31l, N31l, N31l,
238 N31l, N31l, N31l, N31l, N31l, N31l, N31l, N31l,
239 N31l, N31l, N31l, N31l, N31l, N31l, N31l, N31l,
240 N31l, N31l, N31l, N31l, N31l, N31l, N31l, N31l,
241 N31l, N31l, N31l, N31l, N31l, N31l, N31l, N31l,
242 N31l, N31l, N31l, N31l, N31l, N31l, N31l, N31l
247 rol1(
const uint64_t v)
249 return (v << 1) | (v >> 63);
254 ror1(
const uint64_t v)
256 return (v >> 1) | (v << 63);
261 rol31(
const uint64_t v,
unsigned s)
264 return ((v << s) | (v >> (31 - s))) & 0x7FFFFFFF;
269 rol33(
const uint64_t v,
unsigned s)
272 return ((v << s) | (v >> (33 - s))) & 0x1FFFFFFFF;
277 swapbits033(
const uint64_t v)
279 uint64_t x = (v ^ (v >> 33)) & 1;
280 return v ^ (x | (x << 33));
285 swapbits3263(
const uint64_t v)
287 uint64_t x = ((v >> 32) ^ (v >> 63)) & 1;
288 return v ^ ((x << 32) | (x << 63));
293 NTF64(
const char* kmer_seq,
const unsigned k)
296 for (
unsigned i = 0; i < k; i++) {
298 h_val = swapbits033(h_val);
299 h_val ^= seed_tab[(
unsigned char)kmer_seq[i]];
306 NTR64(
const char* kmer_seq,
const unsigned k)
309 for (
unsigned i = 0; i < k; i++) {
311 h_val = swapbits033(h_val);
312 h_val ^= seed_tab[(
unsigned char)kmer_seq[k - 1 - i] & cp_off];
319 NTF64(
const uint64_t fh_val,
321 const unsigned char char_out,
322 const unsigned char char_in)
324 uint64_t h_val = rol1(fh_val);
325 h_val = swapbits033(h_val);
326 h_val ^= seed_tab[char_in];
328 (ms_tab_31l[char_out][k % 31] | ms_tab_33r[char_out][k % 33]);
334 NTR64(
const uint64_t rh_val,
336 const unsigned char char_out,
337 const unsigned char char_in)
339 uint64_t h_val = rh_val ^ (ms_tab_31l[char_in & cp_off][k % 31] |
340 ms_tab_33r[char_in & cp_off][k % 33]);
341 h_val ^= seed_tab[char_out & cp_off];
343 h_val = swapbits3263(h_val);
349 NTC64(
const char* kmer_seq,
const unsigned k)
351 uint64_t fh_val = 0, rh_val = 0;
352 fh_val = NTF64(kmer_seq, k);
353 rh_val = NTR64(kmer_seq, k);
354 return (rh_val < fh_val) ? rh_val : fh_val;
359 NTC64(
const char* kmer_seq,
364 fh_val = NTF64(kmer_seq, k);
365 rh_val = NTR64(kmer_seq, k);
366 return (rh_val < fh_val) ? rh_val : fh_val;
371 NTC64(
const unsigned char char_out,
372 const unsigned char char_in,
377 fh_val = NTF64(fh_val, k, char_out, char_in);
378 rh_val = NTR64(rh_val, k, char_out, char_in);
379 return (rh_val < fh_val) ? rh_val : fh_val;
384 NTF64L(
const uint64_t rh_val,
386 const unsigned char char_out,
387 const unsigned char char_in)
389 uint64_t h_val = rh_val ^ (ms_tab_31l[char_in][k % 31] |
390 ms_tab_33r[char_in][k % 33]);
391 h_val ^= seed_tab[char_out];
393 h_val = swapbits3263(h_val);
399 NTR64L(
const uint64_t fh_val,
401 const unsigned char char_out,
402 const unsigned char char_in)
404 uint64_t h_val = rol1(fh_val);
405 h_val = swapbits033(h_val);
406 h_val ^= seed_tab[char_in & cp_off];
407 h_val ^= (ms_tab_31l[char_out & cp_off][k % 31] |
408 ms_tab_33r[char_out & cp_off][k % 33]);
414 NTC64L(
const unsigned char char_out,
415 const unsigned char char_in,
420 fh_val = NTF64L(fh_val, k, char_out, char_in);
421 rh_val = NTR64L(rh_val, k, char_out, char_in);
422 return (rh_val < fh_val) ? rh_val : fh_val;
427 NTF64(
const char* kmer_seq,
const unsigned k,
const unsigned seed)
429 uint64_t h_val = NTF64(kmer_seq, k);
433 h_val *= seed ^ k * multiseed;
434 h_val ^= h_val >> multishift;
440 NTC64(
const char* kmer_seq,
const unsigned k,
const unsigned seed)
442 uint64_t h_val = NTC64(kmer_seq, k);
446 h_val *= seed ^ k * multiseed;
447 h_val ^= h_val >> multishift;
453 NTM64(
const char* kmer_seq,
const unsigned k,
const unsigned m, uint64_t* h_val)
455 uint64_t b_val = 0, t_val = 0;
456 b_val = NTF64(kmer_seq, k);
458 for (
unsigned i = 1; i < m; i++) {
459 t_val = b_val * (i ^ k * multiseed);
460 t_val ^= t_val >> multishift;
467 NTE64(
const uint64_t h_val,
const unsigned k,
const unsigned i)
469 uint64_t t_val = h_val;
470 t_val *= (i ^ k * multiseed);
471 t_val ^= t_val >> multishift;
477 NTM64(
const unsigned char char_out,
478 const unsigned char char_in,
483 uint64_t b_val = 0, t_val = 0;
484 b_val = NTF64(h_val[0], k, char_out, char_in);
486 for (
unsigned i = 1; i < m; i++) {
487 t_val = b_val * (i ^ k * multiseed);
488 t_val ^= t_val >> multishift;
495 NTMC64(
const char* kmer_seq,
500 uint64_t b_val = 0, t_val = 0;
501 b_val = NTC64(kmer_seq, k);
503 for (
unsigned i = 1; i < m; i++) {
504 t_val = b_val * (i ^ k * multiseed);
505 t_val ^= t_val >> multishift;
512 NTMC64(
const char* kmer_seq,
519 uint64_t b_val = 0, t_val = 0;
520 b_val = NTC64(kmer_seq, k, fh_val, rh_val);
522 for (
unsigned i = 1; i < m; i++) {
523 t_val = b_val * (i ^ k * multiseed);
524 t_val ^= t_val >> multishift;
531 NTMC64(
const unsigned char char_out,
532 const unsigned char char_in,
539 uint64_t b_val = 0, t_val = 0;
540 b_val = NTC64(char_out, char_in, k, fh_val, rh_val);
542 for (
unsigned i = 1; i < m; i++) {
543 t_val = b_val * (i ^ k * multiseed);
544 t_val ^= t_val >> multishift;
555 NTC64(
const char* kmer_seq,
const unsigned k, uint64_t& h_val,
unsigned& locN)
559 uint64_t fh_val = 0, rh_val = 0;
560 for (
int i =
int(k - 1); i >= 0; i--) {
561 if (seed_tab[(
unsigned char)kmer_seq[i]] == seedN) {
565 fh_val = rol1(fh_val);
566 fh_val = swapbits033(fh_val);
567 fh_val ^= seed_tab[(
unsigned char)kmer_seq[k - 1 - i]];
569 rh_val = rol1(rh_val);
570 rh_val = swapbits033(rh_val);
571 rh_val ^= seed_tab[(
unsigned char)kmer_seq[i] & cp_off];
573 h_val = (rh_val < fh_val) ? rh_val : fh_val;
579 NTMC64(
const char* kmer_seq,
585 uint64_t b_val = 0, t_val = 0, fh_val = 0, rh_val = 0;
587 for (
int i =
int(k - 1); i >= 0; i--) {
588 if (seed_tab[(
unsigned char)kmer_seq[i]] == seedN) {
592 fh_val = rol1(fh_val);
593 fh_val = swapbits033(fh_val);
594 fh_val ^= seed_tab[(
unsigned char)kmer_seq[k - 1 - i]];
596 rh_val = rol1(rh_val);
597 rh_val = swapbits033(rh_val);
598 rh_val ^= seed_tab[(
unsigned char)kmer_seq[i] & cp_off];
600 b_val = (rh_val < fh_val) ? rh_val : fh_val;
602 for (
unsigned i = 1; i < m; i++) {
603 t_val = b_val * (i ^ k * multiseed);
604 t_val ^= t_val >> multishift;
612 NTC64(
const char* kmer_seq,
619 h_val = fh_val = rh_val = 0;
621 for (
int i =
int(k - 1); i >= 0; i--) {
622 if (seed_tab[(
unsigned char)kmer_seq[i]] == seedN) {
626 fh_val = rol1(fh_val);
627 fh_val = swapbits033(fh_val);
628 fh_val ^= seed_tab[(
unsigned char)kmer_seq[k - 1 - i]];
630 rh_val = rol1(rh_val);
631 rh_val = swapbits033(rh_val);
632 rh_val ^= seed_tab[(
unsigned char)kmer_seq[i] & cp_off];
634 h_val = (rh_val < fh_val) ? rh_val : fh_val;
640 NTMC64(
const char* kmer_seq,
649 uint64_t b_val = 0, t_val = 0;
651 for (
int i =
int(k - 1); i >= 0; i--) {
652 if (seed_tab[(
unsigned char)kmer_seq[i]] == seedN) {
656 fh_val = rol1(fh_val);
657 fh_val = swapbits033(fh_val);
658 fh_val ^= seed_tab[(
unsigned char)kmer_seq[k - 1 - i]];
660 rh_val = rol1(rh_val);
661 rh_val = swapbits033(rh_val);
662 rh_val ^= seed_tab[(
unsigned char)kmer_seq[i] & cp_off];
664 b_val = (rh_val < fh_val) ? rh_val : fh_val;
666 for (
unsigned i = 1; i < m; i++) {
667 t_val = b_val * (i ^ k * multiseed);
668 t_val ^= t_val >> multishift;
676 NTMC64(
const char* kmer_seq,
686 uint64_t b_val = 0, t_val = 0;
688 for (
int i =
int(k - 1); i >= 0; i--) {
689 if (seed_tab[(
unsigned char)kmer_seq[i]] == seedN) {
693 fh_val = rol1(fh_val);
694 fh_val = swapbits033(fh_val);
695 fh_val ^= seed_tab[(
unsigned char)kmer_seq[k - 1 - i]];
697 rh_val = rol1(rh_val);
698 rh_val = swapbits033(rh_val);
699 rh_val ^= seed_tab[(
unsigned char)kmer_seq[i] & cp_off];
701 hStn = rh_val < fh_val;
702 b_val = hStn ? rh_val : fh_val;
704 for (
unsigned i = 1; i < m; i++) {
705 t_val = b_val * (i ^ k * multiseed);
706 t_val ^= t_val >> multishift;
714 NTMC64(
const unsigned char char_out,
715 const unsigned char char_in,
723 uint64_t b_val = 0, t_val = 0;
724 b_val = NTC64(char_out, char_in, k, fh_val, rh_val);
725 hStn = rh_val < fh_val;
727 for (
unsigned i = 1; i < m; i++) {
728 t_val = b_val * (i ^ k * multiseed);
729 t_val ^= t_val >> multishift;
736 maskHash(uint64_t& fk_val,
738 const char* seed_seq,
739 const char* kmer_seq,
742 uint64_t fs_val = fk_val, rs_val = rk_val;
743 for (
unsigned i = 0; i < k; i++) {
744 if (seed_seq[i] !=
'1') {
746 (ms_tab_31l[(
unsigned char)kmer_seq[i]][(k - 1 - i) % 31] |
747 ms_tab_33r[(
unsigned char)kmer_seq[i]][(k - 1 - i) % 33]);
749 (ms_tab_31l[(
unsigned char)kmer_seq[i] & cp_off][i % 31] |
750 ms_tab_33r[(
unsigned char)kmer_seq[i] & cp_off][i % 33]);
753 return (rs_val < fs_val) ? rs_val : fs_val;
758 NTS64(
const char* kmer_seq,
759 const std::vector<bool>& seed,
765 for (
unsigned i = 0; i < k; i++) {
767 h_val = swapbits033(h_val);
769 h_val ^= seed_tab[(
unsigned char)kmer_seq[i]];
779 NTS64(
const char* kmer_seq,
780 const std::vector<bool>& seed,
781 const unsigned char char_out,
782 const unsigned char char_in,
786 h_val = NTF64(h_val, k, char_out, char_in);
787 uint64_t sVal = h_val;
788 for (
unsigned i = 0; i < k; i++) {
790 sVal ^= (ms_tab_31l[(
unsigned char)kmer_seq[i]][k % 31] |
791 ms_tab_33r[(
unsigned char)kmer_seq[i]][k % 33]);
799 NTMS64(
const char* kmer_seq,
800 const std::vector<std::vector<unsigned>>& seed_seq,
811 for (
int i =
int(k - 1); i >= 0; i--) {
812 if (seed_tab[(
unsigned char)kmer_seq[i]] == seedN) {
816 fh_val = rol1(fh_val);
817 fh_val = swapbits033(fh_val);
818 fh_val ^= seed_tab[(
unsigned char)kmer_seq[k - 1 - i]];
820 rh_val = rol1(rh_val);
821 rh_val = swapbits033(rh_val);
822 rh_val ^= seed_tab[(
unsigned char)kmer_seq[i] & cp_off];
825 for (
unsigned j = 0; j < m; j++) {
826 uint64_t fs_val = fh_val, rs_val = rh_val;
827 for (
const auto& seed_pos : seed_seq[j]) {
828 fs_val ^= (ms_tab_31l[(
unsigned char)kmer_seq[seed_pos]]
829 [(k - 1 - seed_pos) % 31] |
830 ms_tab_33r[(
unsigned char)kmer_seq[seed_pos]]
831 [(k - 1 - seed_pos) % 33]);
832 rs_val ^= (ms_tab_31l[(
unsigned char)kmer_seq[seed_pos] & cp_off]
834 ms_tab_33r[(
unsigned char)kmer_seq[seed_pos] & cp_off]
837 hStn[j] = rs_val < fs_val;
838 h_val[j] = hStn[j] ? rs_val : fs_val;
845 NTMS64(
const char* kmer_seq,
846 const std::vector<std::vector<unsigned>>& seed_seq,
847 const unsigned char char_out,
848 const unsigned char char_in,
856 fh_val = NTF64(fh_val, k, char_out, char_in);
857 rh_val = NTR64(rh_val, k, char_out, char_in);
858 for (
unsigned j = 0; j < m; j++) {
859 uint64_t fs_val = fh_val, rs_val = rh_val;
860 for (
const auto& seed_pos : seed_seq[j]) {
861 fs_val ^= (ms_tab_31l[(
unsigned char)kmer_seq[seed_pos]]
862 [(k - 1 - seed_pos) % 31] |
863 ms_tab_33r[(
unsigned char)kmer_seq[seed_pos]]
864 [(k - 1 - seed_pos) % 33]);
865 rs_val ^= (ms_tab_31l[(
unsigned char)kmer_seq[seed_pos] & cp_off]
867 ms_tab_33r[(
unsigned char)kmer_seq[seed_pos] & cp_off]
871 hStn[j] = rs_val < fs_val;
872 h_val[j] = hStn[j] ? rs_val : fs_val;
878 NTMSM64(
const char* kmer_seq,
879 const std::vector<std::vector<unsigned>>& seed_seq,
890 for (
int i =
int(k - 1); i >= 0; i--) {
891 if (seed_tab[(
unsigned char)kmer_seq[i]] == seedN) {
895 fh_val = rol1(fh_val);
896 fh_val = swapbits033(fh_val);
897 fh_val ^= seed_tab[(
unsigned char)kmer_seq[k - 1 - i]];
899 rh_val = rol1(rh_val);
900 rh_val = swapbits033(rh_val);
901 rh_val ^= seed_tab[(
unsigned char)kmer_seq[i] & cp_off];
904 for (
unsigned j = 0; j < m; j++) {
905 uint64_t fs_val = fh_val, rs_val = rh_val;
906 for (
const auto& seed_pos : seed_seq[j]) {
907 fs_val ^= (ms_tab_31l[(
unsigned char)kmer_seq[seed_pos]]
908 [(k - 1 - seed_pos) % 31] |
909 ms_tab_33r[(
unsigned char)kmer_seq[seed_pos]]
910 [(k - 1 - seed_pos) % 33]);
911 rs_val ^= (ms_tab_31l[(
unsigned char)kmer_seq[seed_pos] & cp_off]
913 ms_tab_33r[(
unsigned char)kmer_seq[seed_pos] & cp_off]
916 h_val[j * m2] = rs_val < fs_val ? rs_val : fs_val;
917 for (
unsigned j2 = 1; j2 < m2; j2++) {
918 uint64_t t_val = h_val[j * m2] * (j2 ^ k * multiseed);
919 t_val ^= t_val >> multishift;
920 h_val[j * m2 + j2] = t_val;
928 NTMSM64(
const char* kmer_seq,
929 const std::vector<std::vector<unsigned>>& seed_seq,
930 const unsigned char char_out,
931 const unsigned char char_in,
939 fh_val = NTF64(fh_val, k, char_out, char_in);
940 rh_val = NTR64(rh_val, k, char_out, char_in);
941 for (
unsigned j = 0; j < m; j++) {
942 uint64_t fs_val = fh_val, rs_val = rh_val;
943 for (
const auto& seed_pos : seed_seq[j]) {
944 fs_val ^= (ms_tab_31l[(
unsigned char)kmer_seq[seed_pos]]
945 [(k - 1 - seed_pos) % 31] |
946 ms_tab_33r[(
unsigned char)kmer_seq[seed_pos]]
947 [(k - 1 - seed_pos) % 33]);
948 rs_val ^= (ms_tab_31l[(
unsigned char)kmer_seq[seed_pos] & cp_off]
950 ms_tab_33r[(
unsigned char)kmer_seq[seed_pos] & cp_off]
953 h_val[j * m2] = rs_val < fs_val ? rs_val : fs_val;
954 for (
unsigned j2 = 1; j2 < m2; j2++) {
955 uint64_t t_val = h_val[j * m2] * (j2 ^ k * multiseed);
956 t_val ^= t_val >> multishift;
957 h_val[j * m2 + j2] = t_val;
964 using SpacedSeed = std::vector<unsigned>;
965 static std::vector<SpacedSeed>
966 parse_seeds(
const std::vector<std::string>& seed_strings);
987 NtHash(
const char* seq,
size_t seq_len,
unsigned k,
unsigned hash_num);
995 NtHash(
const std::string& seq,
unsigned k,
unsigned hash_num);
1003 const uint64_t* hashes()
const;
1005 size_t get_pos()
const {
return pos; }
1006 unsigned get_k()
const {
return k; }
1007 unsigned get_hash_num()
const {
return hash_num; }
1014 const size_t seq_len;
1016 const unsigned hash_num;
1018 std::vector<uint64_t> hashes_vector;
1019 uint64_t forward_hash = 0;
1020 uint64_t reverse_hash = 0;
1030 const std::vector<SpacedSeed>& seeds,
1031 unsigned hash_num_per_seed);
1034 const std::vector<SpacedSeed>& seeds,
1035 unsigned hash_num_per_seed);
1039 const std::vector<std::string>& seeds,
1040 unsigned hash_num_per_seed);
1043 const std::vector<std::string>& seeds,
1044 unsigned hash_num_per_seed);
1046 unsigned get_hash_num_per_seed()
const {
return hash_num_per_seed; }
1053 const unsigned hash_num_per_seed;
1054 std::vector<SpacedSeed> seeds;
1064 , hash_num(hash_num)
1066 hashes_vector.resize(hash_num);
1070 :
NtHash(seq.c_str(), seq.size(), k, hash_num)
1073 inline SeedNtHash::SeedNtHash(
const char* seq,
1076 const std::vector<SpacedSeed>& seeds,
1077 unsigned hash_num_per_seed)
1078 :
NtHash(seq, seq_len, k, seeds.size() * hash_num_per_seed)
1079 , hash_num_per_seed(hash_num_per_seed)
1083 inline SeedNtHash::SeedNtHash(
const std::string& seq,
1085 const std::vector<SpacedSeed>& seeds,
1086 unsigned hash_num_per_seed)
1087 : NtHash(seq, k, seeds.size() * hash_num_per_seed)
1088 , hash_num_per_seed(hash_num_per_seed)
1092 inline SeedNtHash::SeedNtHash(
const char* seq,
1095 const std::vector<std::string>& seeds,
1096 unsigned hash_num_per_seed)
1097 : NtHash(seq, seq_len, k, seeds.size() * hash_num_per_seed)
1098 , hash_num_per_seed(hash_num_per_seed)
1099 , seeds(parse_seeds(seeds))
1102 inline SeedNtHash::SeedNtHash(
const std::string& seq,
1104 const std::vector<std::string>& seeds,
1105 unsigned hash_num_per_seed)
1106 : NtHash(seq, k, seeds.size() * hash_num_per_seed)
1107 , hash_num_per_seed(hash_num_per_seed)
1108 , seeds(parse_seeds(seeds))
1111 static std::vector<SpacedSeed>
1112 parse_seeds(
const std::vector<std::string>& seed_strings)
1114 std::vector<SpacedSeed> seed_set;
1115 for (
const auto& seed_string : seed_strings) {
1118 for (
const auto& c : seed_string) {
1120 seed.push_back(pos);
1124 seed_set.push_back(seed);
1130 #define NT_HASH_INIT(CLASS, NTHASH_CALL) \
1131 inline bool CLASS::init() \
1133 if (k > seq_len) { \
1134 pos = std::numeric_limits<std::size_t>::max(); \
1137 unsigned posN = 0; \
1138 while ((pos < seq_len - k + 1) && !(NTHASH_CALL)) { \
1141 if (pos > seq_len - k) { \
1142 pos = std::numeric_limits<std::size_t>::max(); \
1150 #define NT_HASH_ROLL(CLASS, NTHASH_CALL) \
1151 inline bool CLASS::roll() \
1156 if (pos > seq_len - k) { \
1159 if (seed_tab[(unsigned char)(seq[pos + k - 1])] == seedN) { \
1168 NT_HASH_INIT(NtHash,
1175 hashes_vector.data()))
1176 NT_HASH_ROLL(NtHash,
1177 NTMC64(seq[pos - 1],
1183 hashes_vector.data()))
1185 NT_HASH_INIT(SeedNtHash,
1194 hashes_vector.data()))
1195 NT_HASH_ROLL(SeedNtHash,
1205 hashes_vector.data()))
1210 inline const uint64_t*
1211 NtHash::hashes()
const
1213 return hashes_vector.data();