mutation testing results: tests to add #6114

tdhock · 2024-05-01T17:24:38Z

Hi, @agroce and I did mutation testing of data.table, and here are some changes/mutants that we found, in lines which are apparently covered by tests, but when we ran tests and CRAN checks with the change/mutant, they still passed, and they look like significant changes, for which we may want to add tests.
This was run on 1.15.0 so the line numbers/links refer to that version of the code (first line original, second line mutated).

Not classified yet (TODO)

fifelse.c:27

if (!na_n && len3!=1 && len3!=len0)
if (!na_n && len3!=1 && len3<len0)

fmelt.c:517

int thislen = 0;
int thislen = (0+1);

forder.c:167

*out_min = min ^ 0x80000000u;  // map [-2147483648(INT32_MIN), 2147483647(INT32_MAX)] => [0, 4294967295(UINT32_MAX)]
/**out_min = min ^ 0x80000000u;  // map [-2147483648(INT32_MIN), 2147483647(INT32_MAX)] => [0, 4294967295(UINT32_MAX)]*/

forder.c:335

if (ustr3[i] == ustr3[i-1]) continue;  // use the same o for duplicates
if (ustr3[i] == ustr3[i%1]) continue;  // use the same o for duplicates

forder.c:516

range_i32(INTEGER(x), nrow, &min, &max, &na_count);
range_i32(INTEGER(x), nrow, &max, &max, &na_count);

forder.c:806

if (tmp>x[i-1]) continue;  // x[i-1]==x[i] doesn't happen because x is unique
if (tmp>x[i-1]) continue;  // x[i-1]==x[i] doesn't happen because x is unique
continue;

forder.c:869

while (third<my_n && my_key[third]==my_key[second]) third++;  // look for the last of the second value (which might be a repeat of the first)
while ((1==1) && my_key[third]==my_key[second]) third++;  // look for the last of the second value (which might be a repeat of the first)

forder.c:917

if (radix+1==nradix && !retgrp) {
if (radix-1==nradix && !retgrp) {

fread.c:355

if (ch==eof) return eof;
if (0==1) return eof;

fread.c:365

while (*ch!='\n' && *ch!='\r' && (*ch!='\0' || ch<eof)) ch++;
while (!(*ch!='\n' && *ch!='\r' && (*ch!='\0' || ch<eof))) ch++;

fread.c:572

if (ch==eof && quoteRule!=2) { target->off--; target->len++; }   // test 1324 where final field has open quote but not ending quote; include the open quote like quote rule 2
if ((1==1) && quoteRule!=2) { target->off--; target->len++; }   // test 1324 where final field has open quote but not ending quote; include the open quote like quote rule 2

fread.c:712

if (*ch!=dec && *ch!='e' && *ch!='E') goto fail;
if (*ch!=dec && *ch!='e' && *ch<='E') goto fail;

fread.c:784

int_fast8_t extra = e < 0 ? e + 300 : e - 300;
int_fast8_t extra = e < 0 ? e + 300 : e - 0;

fread.c:784

int_fast8_t extra = e < 0 ? e + 300 : e - 300;
int_fast8_t extra = e != 0 ? e + 300 : e - 300;

fread.c:835

if ((ch[0]=='q' || ch[0]=='s') && ch[1]=='N' && ch[2]=='a' && ch[3]=='N' && (ch += 4)) {
if ((ch[0]=='q' || ch[0]<='s') && ch[1]=='N' && ch[2]=='a' && ch[3]=='N' && (ch += 4)) {

fread.c:1142

if (ch[0]=='t' && ch[1]=='r' && ch[2]=='u' && ch[3]=='e') {
if (ch[0]=='t' && ch[1]=='r' && (1==1)) {

fread.c:1604

while (ch<eof && thisLine++<jumpLines) {
while (ch<eof && thisLine--<jumpLines) {

fread.c:1924

for (int j=ncol; j<tt; j++) { tmpType[j] = type[j] = type0; }
for (int j=ncol; j<=tt; j++) { tmpType[j] = type[j] = type0; }

fread.c:1979

double sd = sqrt( (sumLenSq - (sumLen*sumLen)/sampleLines)/(sampleLines-1) );
double sd = sqrt( (sumLenSq - (sumLen*sumLen)/sampleLines)/(sampleLines-(1-1)) );

fread.c:2501

if (internalErr[0]!='\0') {
if (internalErr[0]<'\0') {

freadR.c:579

if (c<strLen) {
if (c<=strLen) {

frolladaptive.c:219

bool truehasna = hasna>0;
bool truehasna = hasna>1;

frolladaptive.c:246

ans->status = 2;
/*ans->status = 2;*/

fsort.c:165

int MSBNbits = maxBit > 15 ? 16 : maxBit+1;       // how many bits make up the MSB
int MSBNbits = maxBit > 15 ? 16 : maxBit+-1;       // how many bits make up the MSB

fsort.c:223

int fromBit = toBit>7 ? toBit-7 : 0;
int fromBit = toBit>7 ? toBit-7 : (0+1);

fsort.c:247

while (MSBsize>0 && msbCounts[order[MSBsize-1]] < 2) MSBsize--;
while (MSBsize==0 && msbCounts[order[MSBsize-1]] < 2) MSBsize--;

fwrite.c:284

if (sf == 1) ch--; else *ch-- = dec;
if (0==1) ch--; else *ch-- = dec;

gsumm.c:35

int nb=0;
int nb=-1;

ijoin.c:182

if (length(tt) && length(vv)>=count[i]) {   // length check added by Matt to avoid SEGV in #2767
if (length(tt) && length(vv)>=count[i]) {   // length check added by Matt to avoid SEGV in #2767
continue;

ijoin.c:275

++wlen; ++j; ++m;
++wlen; --j; ++m;

ijoin.c:495

if (k == to[i]) {
if (k > to[i]) {

ijoin.c:607

INTEGER(f1__)[thislen] = i+1;
INTEGER(f1__)[thislen] = i+0;

ijoin.c:677

if (len == thislen && count[k-1]) {
if (len == thislen && count[k/1]) {

rbindlist.c:60

if (nrow==0 && ncol==0) return(R_NilValue);
if (nrow>=0 && ncol==0) return(R_NilValue);

rbindlist.c:107

if (thisncol==0) continue;
if (thisncol==1) continue;

rbindlist.c:160

while (wi && dupLink[w]>0) { w=dupLink[w]; --wi; }  // hop through the dups
while (wi && dupLink[w]>-1) { w=dupLink[w]; --wi; }  // hop through the dups

subset.c:181

while (i<n && (firstNeg==0 || firstNA==0)) {
while ((1==1) && (firstNeg==0 || firstNA==0)) {

subset.c:207

if (elem<1 || elem>max) continue;
if (elem<1 || elem==max) continue;

uniqlist.c:79

if (getNumericRounding_C()==0 /*default*/ || inherits(v, "integer64")) {
if (getNumericRounding_C()<0 /*default*/ || inherits(v, "integer64")) {

uniqlist.c:112

switch (TYPEOF(v)) {
switch (TYPEOF(v)) {
break;

uniqlist.c:158

if (len>0) INTEGER(ans)[len-1] = INTEGER(n)[0] - INTEGER(x)[len-1] + 1;
if (len>=0) INTEGER(ans)[len-1] = INTEGER(n)[0] - INTEGER(x)[len-1] + 1;

uniqlist.c:237

ians[i] = (grp+=!same);
ians[i] = (grp+=!same);
continue;

utils.c:72

for (int i=0; i<n; ++i) if (!ISNAN_COMPLEX(xd[i])) {
for (int i=0; i<n; ++i) if (1==1) {

utils.c:255

if (ALTREP(thiscol) || TRUELENGTH(thiscol)<0) {
if (ALTREP(thiscol) || TRUELENGTH(thiscol)!=0) {

utils.c:413 in internal fun

return ScalarInteger(i+1);
return ScalarInteger(i-1);

below to ignore (probably not significant)

bmerge.R:92 not sure what other values xclass can take but "double">="integer64" is FALSE

if (xclass=="integer64" || iclass=="integer64") {
if (xclass>="integer64" || iclass=="integer64") {

data.table.R:770 bysubl[[-1L]] is only valid in a list with two elements, so this can never fail. if length!=2 then first condition returns TRUE, if length=2 then second condition is valid and -1 (not first = second) is equivalent to 2.

if (!is.symbol(bysub) && (length(bysubl)!=2L || !is.symbol(bysubl[[2L]]) || !(bysubl[[1L]] %chin% c(".","c","list"))))
if (!is.symbol(bysub) && (length(bysubl)!=2L || !is.symbol(bysubl[[-1L]]) || !(bysubl[[1L]] %chin% c(".","c","list"))))

data.table.R:2490 ul argument goes into ... in either case.

r = do.call("CJ", c(ul, sorted=sorted, unique=TRUE))
r = do.call("CJ", c( sorted=sorted,ul, unique=TRUE))

data.table.R:2536 split.data.table used as FUN in either case

lapply(ll, split.data.table, drop=drop, by=by[-1L], sorted=sorted, keep.by=keep.by, flatten=flatten)
lapply(ll, drop=drop, split.data.table, by=by[-1L], sorted=sorted, keep.by=keep.by, flatten=flatten)

data.table.R:2969 next line does eval(name[[2L]] which is probably ok to do even if name[[2]] is not a name

} else if (name %iscall% c('$', '[[') && is.name(name[[2L]])) {
} else if (name %iscall% c('$', '[[') && is.name(name[[1L]])) {

fmelt.R:109 still works /equivalent because match.vec comes from regexpr which returns -1 for no match.

measure.vec = which(0 < match.vec)
measure.vec = which(-1 < match.vec)

merge.R:118 probablye not a problem because setcolorder always puts un-mentioned columns at end.

setcolorder(dt, c(by.y, setdiff(names(dt), c(by.y, newend)), newend))
setcolorder(dt, c(by.y, setdiff(names(dt), c(by.y, newend, newend))))

setops.R:8 by.y becomes cols in either case.

by.y = colnamesInt(y, by.y, check_dups=TRUE)
by.y = colnamesInt(y, check_dups=TRUE, by.y)

transpose.R:63 type.convert becomes x in either case.

indxs = unlist(type.convert, recursive=FALSE, use.names=FALSE)
indxs = unlist( recursive=FALSE,type.convert, use.names=FALSE)

between.c:183 probably equivalent

const bool lok=(l!=NA_STRING), uok=(u!=NA_STRING);
const bool lok=(l>NA_STRING), uok=(u!=NA_STRING);

bmerge.c:123 has #nocov

if (!isInteger(nqmaxgrpArg) || length(nqmaxgrpArg) != 1 || INTEGER(nqmaxgrpArg)[0] <= 0)
if (0==1)

bmerge.c:160 is a memory error (allGrp1 is logical/int pointer)

allGrp1[0] = TRUE;
allGrp1[-1] = TRUE;

Possibly relevant but difficult/obscure

assign.c:581 only runs when there is an index with name not starting with __, this is covered by test 1576, fix for #1396 logic is in a while loop ending with indexNo ++; s = CDR(s); so this can probably be bad if there is more than one index (test only has one)

indexNo++;
indexNo--;

Existing test:

# work around for issue introduced in v1.9.4, #1396
X = data.table(x=5:1, y=6:10)
setattr(X, 'index', integer(0))
setattr(attr(X, 'index'), 'x', 5:1) # auto indexed attribute as created from v1.9.4
test(1576, X[, z := 1:5, verbose=TRUE],
    output = "Dropping index 'x' as.*beginning of its name.*very likely created by v1.9.4 of data.table")

bmerge.c:69 could be tested, it!=LGLSXP should be TRUE for it>LGLSXP, but in the mutant it would be FALSE. https://github.com/r-devel/r-svn/blob/145c843da2856bf06c817b2831f4fcf9f515f2e7/src/include/Rinternals.h#L108-L141 says that LGLSXP is 10, CPLXSXP is 15 and VECSXP is 19

if (iN && it!=LGLSXP && it!=INTSXP && it!=REALSXP && it!=STRSXP)
if (iN && it<LGLSXP && it!=INTSXP && it!=REALSXP && it!=STRSXP)

potential tests to add

coalesce.c:53 this code seems like it is setting values, so maybe the values are not being tested?

switch(TYPEOF(first)) {
switch(TYPEOF(first)) {
break;

dogroups.c:345 looks like an estimate of number of rows returned in j, starts at -1, and incremented from there, so this could be an off by one?

estn = 0;
/*estn = 0;*/

fastmean.c:78 happens for REALSXP, seems like it should be important, why do test still pass with this change?

s += t/n;
s += t*n;

tests added

fread.R:341 added test with fread(key=character()) #6115

if (length(key) == 1L) {
if (length(key) < 1L) {

below source code

source code I used to create output above is shown below:

> rmq=function(s)gsub('""""','"',s,fixed=TRUE);mutant.dt[,suffix:=sub(".*[.]", "", file)][order(suffix,file,line)][critical==1 & software=="data.table", cat(sprintf("[%s:%d](https://github.com/Rdatatable/data.table/blob/1.15.0/%s/%s#L%d)\n```\n%s\n%s\n```\n", file,line,ifelse(suffix=="R","R","src"),file,line,rmq(original),rmq(mutated)),sep="\n")]

I will be going through these mutants and classifying them based on how bad they are / and then I will create some new test PRs. If anyone else wants to help, that is great.

The text was updated successfully, but these errors were encountered:

tdhock self-assigned this May 1, 2024

This was referenced May 1, 2024

test fread(key=character()) #6115

Merged

R argument switching mutants are equivalent agroce/universalmutator#26

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

mutation testing results: tests to add #6114

mutation testing results: tests to add #6114

tdhock commented May 1, 2024 •

edited

mutation testing results: tests to add #6114

mutation testing results: tests to add #6114

Comments

tdhock commented May 1, 2024 • edited

Not classified yet (TODO)

below to ignore (probably not significant)

Possibly relevant but difficult/obscure

potential tests to add

tests added

below source code

tdhock commented May 1, 2024 •

edited