The DNA sequence is composed of a series of nucleotides abbreviated as 'A'
, 'C'
, 'G'
, and 'T'
.
- For example,
"ACGAATTCCG"
is a DNA sequence.
When studying DNA, it is useful to identify repeated sequences within the DNA.
Given a string s
that represents a DNA sequence, return all the 10
-letter-long sequences (substrings) that occur more than once in a DNA molecule. You may return the answer in any order.
Example 1:
Input: s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT" Output: ["AAAAACCCCC","CCCCCAAAAA"]
Example 2:
Input: s = "AAAAAAAAAAAAA" Output: ["AAAAAAAAAA"]
Constraints:
1 <= s.length <= 105
s[i]
is either'A'
,'C'
,'G'
, or'T'
.
Companies: LinkedIn, Amazon, Google, Bloomberg
Related Topics:
Hash Table, String, Bit Manipulation, Sliding Window, Rolling Hash, Hash Function
Note that this implementation is unsafe because it doesn't check whether two strings are the same when their hashes are the same.
// OJ: https://leetcode.com/problems/repeated-dna-sequences
// Author: github.com/lzl124631x
// Time: O(N)
// Space: O(4^10)
class Solution {
public:
vector<string> findRepeatedDnaSequences(string s) {
unordered_map<unsigned long long, int> cnt;
vector<string> ans;
unsigned long long h = 0, p = 1, d = 1099511628211;
for (int i = 0, N = s.size(); i < N; ++i) {
if (i < 10) p *= d;
h = h * d + s[i];
if (i >= 10) h -= p * s[i - 10];
if (++cnt[h] == 2) ans.push_back(s.substr(i - 9, 10));
}
return ans;
}
};
// OJ: https://leetcode.com/problems/repeated-dna-sequences
// Author: github.com/lzl124631x
// Time: O(N)
// Space: O(N)
class Solution {
public:
vector<string> findRepeatedDnaSequences(string s) {
if (s.size() <= 10) return {};
unordered_map<int, unordered_set<string>> m;
unordered_set<string> st;
unsigned h = 0, p = 1, prime = 16777619;
for (int i = 0; i < 10; ++i) {
h = h * prime + s[i];
p *= prime;
}
m[h].insert(s.substr(0, 10));
for (int i = 10; i < s.size(); ) {
h = h * prime + s[i] - p * s[i - 10];
++i;
auto sub = s.substr(i - 10, 10);
if (m[h].count(sub)) st.insert(sub);
else m[h].insert(sub);
}
vector<string> ans(begin(st), end(st));
return ans;
}
};