-
Notifications
You must be signed in to change notification settings - Fork 42
Expand file tree
/
Copy pathR_fdep.c
More file actions
100 lines (86 loc) · 2.83 KB
/
R_fdep.c
File metadata and controls
100 lines (86 loc) · 2.83 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
/* Copyright (C) Mark van der Loo and Edwin de Jonge
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* You can contact the author at: mark _dot_ vanderloo _at_ gmail _dot_ com
*/
#include <R.h>
#include <Rdefines.h>
#include <stdint.h>
#include <string.h>
#include "sfh.h"
/*
* Check functional dependencies x -> y
*
* The FD x -> y is to be interpreted as if two
* elements of x have the same value, they shall have the
* same value for y. In the comments below we refer to 'x'
* as the condition and to 'y' as the consequent.
*
* INPUT x, y: character vectors.
* OUTPUT integer vector.
*
* If all pairs in (x,y) obey x->y then the output is seq_along(x).
* suppose that for some i < j we have x[i] == x[j] but y[i] != y[j].
* The i'th and j'th value of the output is then equal to 0.
*
*/
SEXP R_fdcheck(SEXP x, SEXP y){
PROTECT(x);
PROTECT(y);
const int hashfac = 11;
R_xlen_t n = xlength(x)
, nh = hashfac*n
, j;
SEXP out;
PROTECT(out = allocVector(INTSXP,n));
uint32_t *H = (uint32_t *) calloc(nh, sizeof(uint32_t));
R_xlen_t *E = (R_xlen_t *) malloc(nh * sizeof(R_xlen_t));
if ( H == NULL || E == NULL){
free(H); free(E);
error("Could not allocate enough memory");
}
int *I = INTEGER(out);
uint32_t a,b;
for ( R_xlen_t i = 0; i<n; i++, I++){
a = SuperFastHash(CHAR(STRING_ELT(x,i)), length(STRING_ELT(x,i)));
b = SuperFastHash(CHAR(STRING_ELT(y,i)), length(STRING_ELT(y,i)));
rehash:
j = a % nh;
if (H[j] == 0){ // new condition
H[j] = b;
E[j] = i;
(*I) = i + 1; // R-indexing
} else { // found similar condition
// In case of collision in condition: hash the hash.
if ( strcmp( CHAR(STRING_ELT(x,E[j])), CHAR(STRING_ELT(x,i)) ) != 0 ){
a = SuperFastHash((const char *) &a, 4);
goto rehash;
}
if (INTEGER(out)[E[j]] == 0){
(*I) = 0;
} else if (H[j] == b && strcmp( CHAR(STRING_ELT(y,E[j])), CHAR(STRING_ELT(y,i)) ) == 0 ){
// conseqent also similar, all good.
(*I) = i + 1;
} else { // consequent different, store conflicting index.
(*I) = 0;
INTEGER(out)[E[j]] = 0;
}
}
}
free(H);
free(E);
UNPROTECT(3);
return(out);
}