Algorithm


Problem Name: 187. Repeated DNA Sequences

Problem Link: https://leetcode.com/problems/repeated-dna-sequences/

The DNA sequence is composed of a series of nucleotides abbreviated as 'A', 'C', 'G', and 'T'.

  • For example, "ACGAATTCCG" is a DNA sequence.

When studying DNA, it is useful to identify repeated sequences within the DNA.

Given a string s that represents a DNA sequence, return all the 10-letter-long sequences (substrings) that occur more than once in a DNA molecule. You may return the answer in any order.

 

Example 1:

Input: s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT"
Output: ["AAAAACCCCC","CCCCCAAAAA"]

Example 2:

Input: s = "AAAAAAAAAAAAA"
Output: ["AAAAAAAAAA"]

 

Constraints:

  • 1 <= s.length <= 105
  • s[i] is either 'A', 'C', 'G', or 'T'.
 
 

Code Examples

#1 Code Example with C Programming

Code - C Programming


typedef struct sn {
    //char *s;
    int found;
    unsigned int c;
    struct sn *shadow;
} sn_t;
#define HF 1000
int map[26] = { 0 };
unsigned int code(const char *s) {
    int i, c;
    //c = 5381;
    c = 0;
    for (i = 0; i  <  10; i ++) {
    //    c = c * 33 + s[i];
        c = (c << 2) | map[s[i] - 'A'];
    }
    return c;
}
char** findRepeatedDnaSequences(const char* s, int* returnSize) {
    sn_t *patt[HF] = { 0 }, *e, *shadow;
    unsigned int c;
    char **p, *buff;
    const char *x;
    int sz, n, len;
    
    map['A' - 'A'] = 0x0;
    map['C' - 'A'] = 0x1;
    map['G' - 'A'] = 0x2;
    map['T' - 'A'] = 0x3;
    
    sz = 10; n = 0;
    p = malloc(sz * sizeof(char *));
    //assert(p);
    len = strlen(s);
    while (*s && len >= 10) {
        c = code(s);
        e = patt[c % HF];
        while (e && e->c != c) {
            e = e->shadow;
        }
        if (!e) {  // not yet being searched
            e = malloc(sizeof(sn_t));
            //assert(e);
            //e->s = s;
            e->found = 0;
            e->c = c;
            e->shadow = patt[c % HF];  // save in searched table
            patt[c % HF] = e;
        } else if (!e->found) {  // this is a repeated pattern
            e->found = 1;
            if (n == sz) {
                sz *= 2;
                p = realloc(p, sz * sizeof(char *));
                //assert(p);
            }
            buff = malloc(11 * sizeof(char));
            //assert(buff);
            strncpy(buff, s, 10);
            buff[10] = 0;
            p[n ++] = buff;
        }
        s ++; len --;
    }
    
    *returnSize = n;
​
    for (n = 0; n  <  HF; n ++) {
        e = patt[n];
        while (e) {
            shadow = e->shadow;
            free(e);
            e = shadow;
        }
    }
​
    return p;
}
Copy The Code & Try With Live Editor

Input

x
+
cmd
s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT"

Output

x
+
cmd
["AAAAACCCCC","CCCCCAAAAA"]

#2 Code Example with Java Programming

Code - Java Programming


class Solution {
  public List findRepeatedDnaSequences(String s) {
    Set seenSequences = new HashSet<>();
    Set < String> duplicateSequences = new HashSet<>();
    for (int idx = 0; idx  < = s.length() - 10; idx++) {
      String currentSequence = s.substring(idx, idx + 10);
      if (!seenSequences.add(currentSequence)) {
        duplicateSequences.add(currentSequence);
      }
    }
    return new ArrayList < >(duplicateSequences);
  }
}
Copy The Code & Try With Live Editor

Input

x
+
cmd
s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT"

Output

x
+
cmd
["AAAAACCCCC","CCCCCAAAAA"]

#3 Code Example with Javascript Programming

Code - Javascript Programming


const findRepeatedDnaSequences = function(s) {
  if (!s || s.length < 10) {
    return [];
  }
  const map = new Map([["A", 0], ["C", 1], ["G", 2], ["T", 3]]);
  const dna = new Set();
  const repeated = new Set();
  const mask = 0xfffff;
  let cur = 0;
  for (let i = 0, n = s.length; i  <  n; i++) {
    cur <<= 2;
    cur = cur | map.get(s[i]);
    cur = cur & mask;
    if (i >= 9) {
      if (dna.has(cur)) {
        const seq = s.slice(i - 9, i + 1);
        if (!repeated.has(seq)) {
          repeated.add(seq);
        }
      } else {
        dna.add(cur);
      }
    }
  }
  return Array.from(repeated);
};
Copy The Code & Try With Live Editor

Input

x
+
cmd
s = "AAAAAAAAAAAAA"

Output

x
+
cmd
["AAAAAAAAAA"]

#4 Code Example with Python Programming

Code - Python Programming


class Solution:
    def findRepeatedDnaSequences(self, s):
        """
        :type s: str
        :rtype: List[str]
        """
        dic, str = {}, "x" + s[:9]
        for i in range(9, len(s)):
            str = str[1:] + s[i]
            dic[str] = 1 if str not in dic else dic[str] + 1
        return [k for k, v in dic.items() if v > 1]
Copy The Code & Try With Live Editor

Input

x
+
cmd
s = "AAAAAAAAAAAAA"

Output

x
+
cmd
["AAAAAAAAAA"]
Advertisements

Demonstration


Previous
#185 Leetcode Department Top Three Salaries Solution in SQL Leetcode
Next
#188 Leetcode Best Time to Buy and Sell Stock IV Solution in C, C++, Java, JavaScript, Python, C# Leetcode