Skip to content

Latest commit

 

History

History

187

Folders and files

NameName
Last commit message
Last commit date

parent directory

..
 
 

The DNA sequence is composed of a series of nucleotides abbreviated as 'A', 'C', 'G', and 'T'.

  • For example, "ACGAATTCCG" is a DNA sequence.

When studying DNA, it is useful to identify repeated sequences within the DNA.

Given a string s that represents a DNA sequence, return all the 10-letter-long sequences (substrings) that occur more than once in a DNA molecule. You may return the answer in any order.

 

Example 1:

Input: s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT"
Output: ["AAAAACCCCC","CCCCCAAAAA"]

Example 2:

Input: s = "AAAAAAAAAAAAA"
Output: ["AAAAAAAAAA"]

 

Constraints:

  • 1 <= s.length <= 105
  • s[i] is either 'A', 'C', 'G', or 'T'.

Companies: LinkedIn, Amazon, Google, Bloomberg

Related Topics:
Hash Table, String, Bit Manipulation, Sliding Window, Rolling Hash, Hash Function

Solution 1. Rabin Karp

Note that this implementation is unsafe because it doesn't check whether two strings are the same when their hashes are the same.

// OJ: https://leetcode.com/problems/repeated-dna-sequences
// Author: github.com/lzl124631x
// Time: O(N)
// Space: O(4^10)
class Solution {
public:
    vector<string> findRepeatedDnaSequences(string s) {
        unordered_map<unsigned long long, int> cnt;
        vector<string> ans;
        unsigned long long h = 0, p = 1, d = 1099511628211;
        for (int i = 0, N = s.size(); i < N; ++i) {
            if (i < 10) p *= d;
            h = h * d + s[i];
            if (i >= 10) h -= p * s[i - 10];
            if (++cnt[h] == 2) ans.push_back(s.substr(i - 9, 10));
        }
        return ans;
    }
};

Solution 2. Rabin Karp

// OJ: https://leetcode.com/problems/repeated-dna-sequences
// Author: github.com/lzl124631x
// Time: O(N)
// Space: O(N)
class Solution {
public:
    vector<string> findRepeatedDnaSequences(string s) {
        if (s.size() <= 10) return {};
        unordered_map<int, unordered_set<string>> m;
        unordered_set<string> st;
        unsigned h = 0, p = 1, prime = 16777619;
        for (int i = 0; i < 10; ++i) {
            h = h * prime + s[i];
            p *= prime;
        }
        m[h].insert(s.substr(0, 10));
        for (int i = 10; i < s.size(); ) {
            h = h * prime + s[i] - p * s[i - 10];
            ++i;
            auto sub = s.substr(i - 10, 10);
            if (m[h].count(sub)) st.insert(sub);
            else m[h].insert(sub);
        }
        vector<string> ans(begin(st), end(st));
        return ans;
    }
};