Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mutation testing results: tests to add #6114

Open
tdhock opened this issue May 1, 2024 · 0 comments
Open

mutation testing results: tests to add #6114

tdhock opened this issue May 1, 2024 · 0 comments
Assignees

Comments

@tdhock
Copy link
Member

tdhock commented May 1, 2024

Hi, @agroce and I did mutation testing of data.table, and here are some changes/mutants that we found, in lines which are apparently covered by tests, but when we ran tests and CRAN checks with the change/mutant, they still passed, and they look like significant changes, for which we may want to add tests.
This was run on 1.15.0 so the line numbers/links refer to that version of the code (first line original, second line mutated).

Not classified yet (TODO)

fifelse.c:27

if (!na_n && len3!=1 && len3!=len0)
if (!na_n && len3!=1 && len3<len0)

fmelt.c:517

int thislen = 0;
int thislen = (0+1);

forder.c:167

*out_min = min ^ 0x80000000u;  // map [-2147483648(INT32_MIN), 2147483647(INT32_MAX)] => [0, 4294967295(UINT32_MAX)]
/**out_min = min ^ 0x80000000u;  // map [-2147483648(INT32_MIN), 2147483647(INT32_MAX)] => [0, 4294967295(UINT32_MAX)]*/

forder.c:335

if (ustr3[i] == ustr3[i-1]) continue;  // use the same o for duplicates
if (ustr3[i] == ustr3[i%1]) continue;  // use the same o for duplicates

forder.c:516

range_i32(INTEGER(x), nrow, &min, &max, &na_count);
range_i32(INTEGER(x), nrow, &max, &max, &na_count);

forder.c:806

if (tmp>x[i-1]) continue;  // x[i-1]==x[i] doesn't happen because x is unique
if (tmp>x[i-1]) continue;  // x[i-1]==x[i] doesn't happen because x is unique
continue;

forder.c:869

while (third<my_n && my_key[third]==my_key[second]) third++;  // look for the last of the second value (which might be a repeat of the first)
while ((1==1) && my_key[third]==my_key[second]) third++;  // look for the last of the second value (which might be a repeat of the first)

forder.c:917

if (radix+1==nradix && !retgrp) {
if (radix-1==nradix && !retgrp) {

fread.c:355

if (ch==eof) return eof;
if (0==1) return eof;

fread.c:365

while (*ch!='\n' && *ch!='\r' && (*ch!='\0' || ch<eof)) ch++;
while (!(*ch!='\n' && *ch!='\r' && (*ch!='\0' || ch<eof))) ch++;

fread.c:572

if (ch==eof && quoteRule!=2) { target->off--; target->len++; }   // test 1324 where final field has open quote but not ending quote; include the open quote like quote rule 2
if ((1==1) && quoteRule!=2) { target->off--; target->len++; }   // test 1324 where final field has open quote but not ending quote; include the open quote like quote rule 2

fread.c:712

if (*ch!=dec && *ch!='e' && *ch!='E') goto fail;
if (*ch!=dec && *ch!='e' && *ch<='E') goto fail;

fread.c:784

int_fast8_t extra = e < 0 ? e + 300 : e - 300;
int_fast8_t extra = e < 0 ? e + 300 : e - 0;

fread.c:784

int_fast8_t extra = e < 0 ? e + 300 : e - 300;
int_fast8_t extra = e != 0 ? e + 300 : e - 300;

fread.c:835

if ((ch[0]=='q' || ch[0]=='s') && ch[1]=='N' && ch[2]=='a' && ch[3]=='N' && (ch += 4)) {
if ((ch[0]=='q' || ch[0]<='s') && ch[1]=='N' && ch[2]=='a' && ch[3]=='N' && (ch += 4)) {

fread.c:1142

if (ch[0]=='t' && ch[1]=='r' && ch[2]=='u' && ch[3]=='e') {
if (ch[0]=='t' && ch[1]=='r' && (1==1)) {

fread.c:1604

while (ch<eof && thisLine++<jumpLines) {
while (ch<eof && thisLine--<jumpLines) {

fread.c:1924

for (int j=ncol; j<tt; j++) { tmpType[j] = type[j] = type0; }
for (int j=ncol; j<=tt; j++) { tmpType[j] = type[j] = type0; }

fread.c:1979

double sd = sqrt( (sumLenSq - (sumLen*sumLen)/sampleLines)/(sampleLines-1) );
double sd = sqrt( (sumLenSq - (sumLen*sumLen)/sampleLines)/(sampleLines-(1-1)) );

fread.c:2501

if (internalErr[0]!='\0') {
if (internalErr[0]<'\0') {

freadR.c:579

if (c<strLen) {
if (c<=strLen) {

frolladaptive.c:219

bool truehasna = hasna>0;
bool truehasna = hasna>1;

frolladaptive.c:246

ans->status = 2;
/*ans->status = 2;*/

fsort.c:165

int MSBNbits = maxBit > 15 ? 16 : maxBit+1;       // how many bits make up the MSB
int MSBNbits = maxBit > 15 ? 16 : maxBit+-1;       // how many bits make up the MSB

fsort.c:223

int fromBit = toBit>7 ? toBit-7 : 0;
int fromBit = toBit>7 ? toBit-7 : (0+1);

fsort.c:247

while (MSBsize>0 && msbCounts[order[MSBsize-1]] < 2) MSBsize--;
while (MSBsize==0 && msbCounts[order[MSBsize-1]] < 2) MSBsize--;

fwrite.c:284

if (sf == 1) ch--; else *ch-- = dec;
if (0==1) ch--; else *ch-- = dec;

gsumm.c:35

int nb=0;
int nb=-1;

ijoin.c:182

if (length(tt) && length(vv)>=count[i]) {   // length check added by Matt to avoid SEGV in #2767
if (length(tt) && length(vv)>=count[i]) {   // length check added by Matt to avoid SEGV in #2767
continue;

ijoin.c:275

++wlen; ++j; ++m;
++wlen; --j; ++m;

ijoin.c:495

if (k == to[i]) {
if (k > to[i]) {

ijoin.c:607

INTEGER(f1__)[thislen] = i+1;
INTEGER(f1__)[thislen] = i+0;

ijoin.c:677

if (len == thislen && count[k-1]) {
if (len == thislen && count[k/1]) {

rbindlist.c:60

if (nrow==0 && ncol==0) return(R_NilValue);
if (nrow>=0 && ncol==0) return(R_NilValue);

rbindlist.c:107

if (thisncol==0) continue;
if (thisncol==1) continue;

rbindlist.c:160

while (wi && dupLink[w]>0) { w=dupLink[w]; --wi; }  // hop through the dups
while (wi && dupLink[w]>-1) { w=dupLink[w]; --wi; }  // hop through the dups

subset.c:181

while (i<n && (firstNeg==0 || firstNA==0)) {
while ((1==1) && (firstNeg==0 || firstNA==0)) {

subset.c:207

if (elem<1 || elem>max) continue;
if (elem<1 || elem==max) continue;

uniqlist.c:79

if (getNumericRounding_C()==0 /*default*/ || inherits(v, "integer64")) {
if (getNumericRounding_C()<0 /*default*/ || inherits(v, "integer64")) {

uniqlist.c:112

switch (TYPEOF(v)) {
switch (TYPEOF(v)) {
break;

uniqlist.c:158

if (len>0) INTEGER(ans)[len-1] = INTEGER(n)[0] - INTEGER(x)[len-1] + 1;
if (len>=0) INTEGER(ans)[len-1] = INTEGER(n)[0] - INTEGER(x)[len-1] + 1;

uniqlist.c:237

ians[i] = (grp+=!same);
ians[i] = (grp+=!same);
continue;

utils.c:72

for (int i=0; i<n; ++i) if (!ISNAN_COMPLEX(xd[i])) {
for (int i=0; i<n; ++i) if (1==1) {

utils.c:255

if (ALTREP(thiscol) || TRUELENGTH(thiscol)<0) {
if (ALTREP(thiscol) || TRUELENGTH(thiscol)!=0) {

utils.c:413 in internal fun

return ScalarInteger(i+1);
return ScalarInteger(i-1);

below to ignore (probably not significant)

bmerge.R:92 not sure what other values xclass can take but "double">="integer64" is FALSE

if (xclass=="integer64" || iclass=="integer64") {
if (xclass>="integer64" || iclass=="integer64") {

data.table.R:770 bysubl[[-1L]] is only valid in a list with two elements, so this can never fail. if length!=2 then first condition returns TRUE, if length=2 then second condition is valid and -1 (not first = second) is equivalent to 2.

if (!is.symbol(bysub) && (length(bysubl)!=2L || !is.symbol(bysubl[[2L]]) || !(bysubl[[1L]] %chin% c(".","c","list"))))
if (!is.symbol(bysub) && (length(bysubl)!=2L || !is.symbol(bysubl[[-1L]]) || !(bysubl[[1L]] %chin% c(".","c","list"))))

data.table.R:2490 ul argument goes into ... in either case.

r = do.call("CJ", c(ul, sorted=sorted, unique=TRUE))
r = do.call("CJ", c( sorted=sorted,ul, unique=TRUE))

data.table.R:2536 split.data.table used as FUN in either case

lapply(ll, split.data.table, drop=drop, by=by[-1L], sorted=sorted, keep.by=keep.by, flatten=flatten)
lapply(ll, drop=drop, split.data.table, by=by[-1L], sorted=sorted, keep.by=keep.by, flatten=flatten)

data.table.R:2969 next line does eval(name[[2L]] which is probably ok to do even if name[[2]] is not a name

} else if (name %iscall% c('$', '[[') && is.name(name[[2L]])) {
} else if (name %iscall% c('$', '[[') && is.name(name[[1L]])) {

fmelt.R:109 still works /equivalent because match.vec comes from regexpr which returns -1 for no match.

measure.vec = which(0 < match.vec)
measure.vec = which(-1 < match.vec)

merge.R:118 probablye not a problem because setcolorder always puts un-mentioned columns at end.

setcolorder(dt, c(by.y, setdiff(names(dt), c(by.y, newend)), newend))
setcolorder(dt, c(by.y, setdiff(names(dt), c(by.y, newend, newend))))

setops.R:8 by.y becomes cols in either case.

by.y = colnamesInt(y, by.y, check_dups=TRUE)
by.y = colnamesInt(y, check_dups=TRUE, by.y)

transpose.R:63 type.convert becomes x in either case.

indxs = unlist(type.convert, recursive=FALSE, use.names=FALSE)
indxs = unlist( recursive=FALSE,type.convert, use.names=FALSE)

between.c:183 probably equivalent

const bool lok=(l!=NA_STRING), uok=(u!=NA_STRING);
const bool lok=(l>NA_STRING), uok=(u!=NA_STRING);

bmerge.c:123 has #nocov

if (!isInteger(nqmaxgrpArg) || length(nqmaxgrpArg) != 1 || INTEGER(nqmaxgrpArg)[0] <= 0)
if (0==1)

bmerge.c:160 is a memory error (allGrp1 is logical/int pointer)

allGrp1[0] = TRUE;
allGrp1[-1] = TRUE;

Possibly relevant but difficult/obscure

assign.c:581 only runs when there is an index with name not starting with __, this is covered by test 1576, fix for #1396 logic is in a while loop ending with indexNo ++; s = CDR(s); so this can probably be bad if there is more than one index (test only has one)

indexNo++;
indexNo--;

Existing test:

# work around for issue introduced in v1.9.4, #1396
X = data.table(x=5:1, y=6:10)
setattr(X, 'index', integer(0))
setattr(attr(X, 'index'), 'x', 5:1) # auto indexed attribute as created from v1.9.4
test(1576, X[, z := 1:5, verbose=TRUE],
    output = "Dropping index 'x' as.*beginning of its name.*very likely created by v1.9.4 of data.table")

bmerge.c:69 could be tested, it!=LGLSXP should be TRUE for it>LGLSXP, but in the mutant it would be FALSE. https://github.com/r-devel/r-svn/blob/145c843da2856bf06c817b2831f4fcf9f515f2e7/src/include/Rinternals.h#L108-L141 says that LGLSXP is 10, CPLXSXP is 15 and VECSXP is 19

if (iN && it!=LGLSXP && it!=INTSXP && it!=REALSXP && it!=STRSXP)
if (iN && it<LGLSXP && it!=INTSXP && it!=REALSXP && it!=STRSXP)

potential tests to add

coalesce.c:53 this code seems like it is setting values, so maybe the values are not being tested?

switch(TYPEOF(first)) {
switch(TYPEOF(first)) {
break;

dogroups.c:345 looks like an estimate of number of rows returned in j, starts at -1, and incremented from there, so this could be an off by one?

estn = 0;
/*estn = 0;*/

fastmean.c:78 happens for REALSXP, seems like it should be important, why do test still pass with this change?

s += t/n;
s += t*n;

tests added

fread.R:341 added test with fread(key=character()) #6115

if (length(key) == 1L) {
if (length(key) < 1L) {

below source code

source code I used to create output above is shown below:

> rmq=function(s)gsub('""""','"',s,fixed=TRUE);mutant.dt[,suffix:=sub(".*[.]", "", file)][order(suffix,file,line)][critical==1 & software=="data.table", cat(sprintf("[%s:%d](https://github.com/Rdatatable/data.table/blob/1.15.0/%s/%s#L%d)\n```\n%s\n%s\n```\n", file,line,ifelse(suffix=="R","R","src"),file,line,rmq(original),rmq(mutated)),sep="\n")]

I will be going through these mutants and classifying them based on how bad they are / and then I will create some new test PRs. If anyone else wants to help, that is great.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant