Algorithm
Problem Name: 187. Repeated DNA Sequences
The DNA sequence is composed of a series of nucleotides abbreviated as 'A'
, 'C'
, 'G'
, and 'T'
.
- For example,
"ACGAATTCCG"
is a DNA sequence.
When studying DNA, it is useful to identify repeated sequences within the DNA.
Given a string s
that represents a DNA sequence, return all the 10
-letter-long sequences (substrings) that occur more than once in a DNA molecule. You may return the answer in any order.
Example 1:
Input: s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT" Output: ["AAAAACCCCC","CCCCCAAAAA"]
Example 2:
Input: s = "AAAAAAAAAAAAA" Output: ["AAAAAAAAAA"]
Constraints:
1 <= s.length <= 105
s[i]
is either'A'
,'C'
,'G'
, or'T'
.
Code Examples
#1 Code Example with C Programming
Code -
C Programming
typedef struct sn {
//char *s;
int found;
unsigned int c;
struct sn *shadow;
} sn_t;
#define HF 1000
int map[26] = { 0 };
unsigned int code(const char *s) {
int i, c;
//c = 5381;
c = 0;
for (i = 0; i < 10; i ++) {
// c = c * 33 + s[i];
c = (c << 2) | map[s[i] - 'A'];
}
return c;
}
char** findRepeatedDnaSequences(const char* s, int* returnSize) {
sn_t *patt[HF] = { 0 }, *e, *shadow;
unsigned int c;
char **p, *buff;
const char *x;
int sz, n, len;
map['A' - 'A'] = 0x0;
map['C' - 'A'] = 0x1;
map['G' - 'A'] = 0x2;
map['T' - 'A'] = 0x3;
sz = 10; n = 0;
p = malloc(sz * sizeof(char *));
//assert(p);
len = strlen(s);
while (*s && len >= 10) {
c = code(s);
e = patt[c % HF];
while (e && e->c != c) {
e = e->shadow;
}
if (!e) { // not yet being searched
e = malloc(sizeof(sn_t));
//assert(e);
//e->s = s;
e->found = 0;
e->c = c;
e->shadow = patt[c % HF]; // save in searched table
patt[c % HF] = e;
} else if (!e->found) { // this is a repeated pattern
e->found = 1;
if (n == sz) {
sz *= 2;
p = realloc(p, sz * sizeof(char *));
//assert(p);
}
buff = malloc(11 * sizeof(char));
//assert(buff);
strncpy(buff, s, 10);
buff[10] = 0;
p[n ++] = buff;
}
s ++; len --;
}
*returnSize = n;
for (n = 0; n < HF; n ++) {
e = patt[n];
while (e) {
shadow = e->shadow;
free(e);
e = shadow;
}
}
return p;
}
Copy The Code &
Try With Live Editor
Input
Output
#2 Code Example with Java Programming
Code -
Java Programming
class Solution {
public List findRepeatedDnaSequences(String s) {
Set seenSequences = new HashSet<>();
Set < String> duplicateSequences = new HashSet<>();
for (int idx = 0; idx < = s.length() - 10; idx++) {
String currentSequence = s.substring(idx, idx + 10);
if (!seenSequences.add(currentSequence)) {
duplicateSequences.add(currentSequence);
}
}
return new ArrayList < >(duplicateSequences);
}
}
Copy The Code &
Try With Live Editor
Input
Output
#3 Code Example with Javascript Programming
Code -
Javascript Programming
const findRepeatedDnaSequences = function(s) {
if (!s || s.length < 10) {
return [];
}
const map = new Map([["A", 0], ["C", 1], ["G", 2], ["T", 3]]);
const dna = new Set();
const repeated = new Set();
const mask = 0xfffff;
let cur = 0;
for (let i = 0, n = s.length; i < n; i++) {
cur <<= 2;
cur = cur | map.get(s[i]);
cur = cur & mask;
if (i >= 9) {
if (dna.has(cur)) {
const seq = s.slice(i - 9, i + 1);
if (!repeated.has(seq)) {
repeated.add(seq);
}
} else {
dna.add(cur);
}
}
}
return Array.from(repeated);
};
Copy The Code &
Try With Live Editor
Input
Output
#4 Code Example with Python Programming
Code -
Python Programming
class Solution:
def findRepeatedDnaSequences(self, s):
"""
:type s: str
:rtype: List[str]
"""
dic, str = {}, "x" + s[:9]
for i in range(9, len(s)):
str = str[1:] + s[i]
dic[str] = 1 if str not in dic else dic[str] + 1
return [k for k, v in dic.items() if v > 1]
Copy The Code &
Try With Live Editor
Input
Output