-
Notifications
You must be signed in to change notification settings - Fork 27
/
187. Repeated DNA Sequences.c
108 lines (97 loc) · 2.7 KB
/
187. Repeated DNA Sequences.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
/*
187. Repeated DNA Sequences
All DNA is composed of a series of nucleotides abbreviated as A, C, G, and T, for example: "ACGAATTCCG". When studying DNA, it is sometimes useful to identify repeated sequences within the DNA.
Write a function to find all the 10-letter-long sequences (substrings) that occur more than once in a DNA molecule.
For example,
Given s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT",
Return:
["AAAAACCCCC", "CCCCCAAAAA"].
*/
/**
* Return an array of size *returnSize.
* Note: The returned array must be malloced, assume caller calls free().
*/
typedef struct sn {
//char *s;
int found;
unsigned int c;
struct sn *shadow;
} sn_t;
#define HF 1000
int map[26] = { 0 };
unsigned int code(const char *s) {
int i, c;
//c = 5381;
c = 0;
for (i = 0; i < 10; i ++) {
// c = c * 33 + s[i];
c = (c << 2) | map[s[i] - 'A'];
}
return c;
}
char** findRepeatedDnaSequences(const char* s, int* returnSize) {
sn_t *patt[HF] = { 0 }, *e, *shadow;
unsigned int c;
char **p, *buff;
const char *x;
int sz, n, len;
map['A' - 'A'] = 0x0;
map['C' - 'A'] = 0x1;
map['G' - 'A'] = 0x2;
map['T' - 'A'] = 0x3;
sz = 10; n = 0;
p = malloc(sz * sizeof(char *));
//assert(p);
len = strlen(s);
while (*s && len >= 10) {
c = code(s);
e = patt[c % HF];
while (e && e->c != c) {
e = e->shadow;
}
if (!e) { // not yet being searched
e = malloc(sizeof(sn_t));
//assert(e);
//e->s = s;
e->found = 0;
e->c = c;
e->shadow = patt[c % HF]; // save in searched table
patt[c % HF] = e;
} else if (!e->found) { // this is a repeated pattern
e->found = 1;
if (n == sz) {
sz *= 2;
p = realloc(p, sz * sizeof(char *));
//assert(p);
}
buff = malloc(11 * sizeof(char));
//assert(buff);
strncpy(buff, s, 10);
buff[10] = 0;
p[n ++] = buff;
}
s ++; len --;
}
*returnSize = n;
for (n = 0; n < HF; n ++) {
e = patt[n];
while (e) {
shadow = e->shadow;
free(e);
e = shadow;
}
}
return p;
}
/*
Difficulty:Medium
Total Accepted:78.5K
Total Submissions:249.6K
Companies LinkedIn
Related Topics Hash Table Bit Manipulation
*/