Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
17cebc0
Update workflow to avoid relying on external PAT (secrets.GH_TOKEN) a…
villegar Sep 20, 2025
2ddfebf
Update with released 6.3.4
StuartWheater Sep 21, 2025
3563df7
Merge pull request #428 from villegar/v6.3.5-dev
villegar Sep 21, 2025
43398e8
featÑ mdPatterns fct
ESCRI11 Oct 27, 2025
987fbcc
add mdPatterns to DATASHIELD
ESCRI11 Oct 27, 2025
16e22aa
Merge pull request #438 from ESCRI11/dev-task-14
StuartWheater Nov 2, 2025
eda4bdc
Initial 'mdPatternDS' tests
StuartWheater Nov 3, 2025
2e87305
Merge pull request #439 from StuartWheater/v6.3.5-dev
StuartWheater Nov 3, 2025
e499d8d
Additional mdPattern tests
StuartWheater Nov 3, 2025
3ba7b4c
Merge pull request #440 from StuartWheater/v6.3.5-dev
StuartWheater Nov 3, 2025
2485816
Increased data 'mdPatternDS' tests
StuartWheater Nov 3, 2025
cf61c2b
Added 'set.standard.disclosure.settings()'
StuartWheater Nov 4, 2025
e80c23f
Merge branch 'v6.3.5-dev' of github.com:StuartWheater/dsBase into v6.…
StuartWheater Nov 4, 2025
da36ab8
Updated 'mdPatternDS' tests
StuartWheater Nov 4, 2025
40e6425
Merge pull request #441 from StuartWheater/v6.3.5-dev
StuartWheater Nov 4, 2025
9b0fec1
Fix version
StuartWheater Nov 21, 2025
2a3b3fe
Update to docs
StuartWheater Nov 21, 2025
81f5f64
Remove nightly scheduled run and update call to parse_test_report.R
villegar Nov 26, 2025
e0d234f
Add session_info_*.txt as one of the log outputs and avoid storing du…
villegar Nov 26, 2025
42efdb6
Update 'perf' support
StuartWheater Nov 30, 2025
7c138f8
Merge branch 'v6.3.5-dev' of github.com:StuartWheater/dsBase into v6.…
StuartWheater Nov 30, 2025
27e5a1f
Merge pull request #446 from StuartWheater/v6.3.5-dev
StuartWheater Nov 30, 2025
998482c
Minor docs update
StuartWheater Nov 30, 2025
b335757
Merge branch 'v6.3.5-dev' of github.com:StuartWheater/dsBase into v6.…
StuartWheater Nov 30, 2025
5ae71ec
Merge pull request #447 from StuartWheater/v6.3.5-dev
StuartWheater Dec 1, 2025
bcfb6ae
Reworking of performance profiles
StuartWheater Jan 6, 2026
5af03c3
Merge branch 'v6.3.5-dev' of github.com:StuartWheater/dsBase into v6.…
StuartWheater Jan 6, 2026
2b201d6
Rework setting of variable
StuartWheater Jan 6, 2026
079a067
Merge pull request #449 from StuartWheater/v6.3.5-dev
StuartWheater Jan 6, 2026
9e3892d
Fixed Type
StuartWheater Jan 7, 2026
ee8d845
Merge branch 'datashield:v6.3.5-dev' into v6.3.5-dev
StuartWheater Jan 7, 2026
e21530a
Merge pull request #450 from StuartWheater/v6.3.5-dev
StuartWheater Jan 7, 2026
56ee2f1
Update test schedual
StuartWheater Feb 5, 2026
8665845
Merge branch 'v6.3.5-dev' of github.com:StuartWheater/dsBase into v6.…
StuartWheater Feb 5, 2026
05c8de2
Update test schedual
StuartWheater Feb 10, 2026
8c83782
Update version
StuartWheater Feb 20, 2026
69d4bb4
Update to glmSLMADS.assign
StuartWheater Feb 20, 2026
2a4a349
Update to documents
StuartWheater Feb 20, 2026
d79c7d2
Removed checking of 'opal'
StuartWheater Feb 20, 2026
1aa5c13
Merge pull request #458 from StuartWheater/v6.3.5-dev
StuartWheater Feb 22, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 23 additions & 30 deletions .github/workflows/dsBase_test_suite.yaml
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ on:
push:
schedule:
- cron: '0 0 * * 0' # Weekly
- cron: '0 1 * * *' # Nightly

jobs:
dsBase_test_suite:
Expand All @@ -37,6 +36,7 @@ jobs:
BRANCH_NAME: ${{ github.ref_name }}
REPO_OWNER: ${{ github.repository_owner }}
R_KEEP_PKG_SOURCE: yes
GITHUB_TOKEN: ${{ github.token || 'placeholder-token' }}

steps:
- name: Checkout dsBase
Expand All @@ -45,12 +45,14 @@ jobs:
path: dsBase

- name: Checkout testStatus
if: ${{ github.actor != 'nektos/act' }} # for local deployment only
uses: actions/checkout@v4
with:
repository: ${{ env.REPO_OWNER }}/testStatus
token: ${{ secrets.GH_TOKEN }}
ref: master
path: testStatus
persist-credentials: false
token: ${{ env.GITHUB_TOKEN }}

- uses: r-lib/actions/setup-pandoc@v2

Expand Down Expand Up @@ -150,50 +152,41 @@ jobs:
echo "branch:${{ env.BRANCH_NAME }}" > ${{ env.WORKFLOW_ID }}.txt
echo "os:$(lsb_release -ds)" >> ${{ env.WORKFLOW_ID }}.txt
echo "R:$(R --version | head -n1)" >> ${{ env.WORKFLOW_ID }}.txt
Rscript --vanilla -e 'sessionInfo()' >> session_info_${{ env.WORKFLOW_ID }}.txt
working-directory: dsBase/logs

- name: Parse results from testthat and covr
run: |
Rscript --verbose --vanilla ../testStatus/source/parse_test_report.R logs/
Rscript --verbose --vanilla ../testStatus/source/parse_test_report.R logs/ logs/ https://github.com/datashield/${{ env.PROJECT_NAME }}/blob/${{ env.BRANCH_NAME }} '[^-:.]+' '(?<=::)[^:]+(?=::)'
working-directory: dsBase

- name: Commit results to testStatus
env:
PROJECT_NAME: ${{ env.PROJECT_NAME }}
BRANCH_NAME: ${{ env.BRANCH_NAME }}

- name: Render report
run: |
git config --global user.email "github-actions[bot]@users.noreply.github.com"
git config --global user.name "github-actions[bot]"
cd testStatus

# Reconfigure remote to use GitHub token for authentication
git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ env.REPO_OWNER }}/testStatus.git
git checkout master
git pull origin master

mkdir -p logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
mkdir -p docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
mkdir -p docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/latest/
# clear the latest directory
rm -rf docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/latest/*
mkdir -p new/logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
mkdir -p new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/latest/

# Copy logs to new logs directory location
cp -rv ../dsBase/logs/* logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
cp -rv ../dsBase/logs/${{ env.WORKFLOW_ID }}.txt logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/

# Create symbolic links
ln -sf ${{ env.WORKFLOW_ID }}/ logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/.LATEST
# ln -sf docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/ docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/latest
cp -rv ../${{ env.PROJECT_NAME }}/logs/* new/logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/
cp -rv ../${{ env.PROJECT_NAME }}/logs/${{ env.WORKFLOW_ID }}.txt new/logs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/

R -e 'input_dir <- file.path("../logs", Sys.getenv("PROJECT_NAME"), Sys.getenv("BRANCH_NAME"), Sys.getenv("WORKFLOW_ID")); quarto::quarto_render("source/test_report.qmd", execute_params = list(input_dir = input_dir))'
mv source/test_report.html docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/index.html
cp -r docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/${{ env.WORKFLOW_ID }}/* docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/latest

git add .
git commit -m "Auto test for ${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }} @ ${{ env.WORKFLOW_ID }}" || echo "No changes to commit"
git push origin master
R -e 'input_dir <- file.path("../new/logs", Sys.getenv("PROJECT_NAME"), Sys.getenv("BRANCH_NAME"), Sys.getenv("WORKFLOW_ID")); quarto::quarto_render("source/test_report.qmd", execute_params = list(input_dir = input_dir))'
mv source/test_report.html new/docs/${{ env.PROJECT_NAME }}/${{ env.BRANCH_NAME }}/latest/index.html

env:
PROJECT_NAME: ${{ env.PROJECT_NAME }}
BRANCH_NAME: ${{ env.BRANCH_NAME }}
WORKFLOW_ID: ${{ env.WORKFLOW_ID }}

- name: Upload test logs
uses: actions/upload-artifact@v4
with:
name: dsbase-logs
path: testStatus/new

- name: Dump environment info
run: |
Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ Description: Base 'DataSHIELD' functions for the server side. 'DataSHIELD' is a
been designed to only share non disclosive summary statistics, with built in automated output
checking based on statistical disclosure control. With data sites setting the threshold values for
the automated output checks. For more details, see 'citation("dsBase")'.
Version: 6.3.4
Version: 6.3.5
Authors@R: c(person(given = "Paul",
family = "Burton",
role = c("aut"),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ export(matrixDimnamesDS)
export(matrixInvertDS)
export(matrixMultDS)
export(matrixTransposeDS)
export(mdPatternDS)
export(meanDS)
export(meanSdGpDS)
export(mergeDS)
Expand Down
41 changes: 13 additions & 28 deletions R/glmSLMADS.assign.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,40 +18,25 @@
#' @export
glmSLMADS.assign <- function(formula, family, offsetName, weightsName, dataName){

#############################################################
#MODULE 1: CAPTURE THE nfilter SETTINGS #
thr <- dsBase::listDisclosureSettingsDS() #
nfilter.tab <- as.numeric(thr$nfilter.tab) #
nfilter.glm <- as.numeric(thr$nfilter.glm) #
#nfilter.subset<-as.numeric(thr$nfilter.subset) #
#nfilter.string<-as.numeric(thr$nfilter.string) #
#############################################################
# Convert transmitable text for special link variance combinations back to full representation
if(family=="quasigamma.link_log")
{family<-"quasi(link=log,variance=mu^2)"}

########################################
############
#Convert transmitable text for special link variance combinations back to full representation
if(family=="quasigamma.link_log")
{family<-"quasi(link=log,variance=mu^2)"}
if(family=="Gamma.link_log")
{family<-"Gamma(link=log)"}

if(family=="Gamma.link_log")
{family<-"Gamma(link=log)"}
#############
# Correctly name offset, weights and data objects in function call
# (to allow glmPredict to work correctly later)
calltext <- paste0("mg<-glm(formula,family=",family,",offset=",
offsetName,",weights=",weightsName,",data=", dataName,",x=TRUE)")

#Activate family object (this may not be necessary as character string may already be OK
#but just checking
final.family.object<-eval(parse(text=family))
eval(parse(text=calltext))

# update the call object to include the actual formula
mg$call$formula <- formula

#Correctly name offset, weights and data objects in function call
#(to allow glmPredict to work correctly later)
calltext<-paste0("mg<-glm(formula,family=",family,",offset=",
offsetName,",weights=",weightsName,",data=", dataName,",x=TRUE)")

eval(parse(text=calltext))

return(mg)
return(mg)

}

# ASSIGN FUNCTION
# glmSLMADS.assign
121 changes: 121 additions & 0 deletions R/mdPatternDS.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
#'
#' @title Missing data pattern with disclosure control
#' @description This function is a serverside aggregate function that computes the
#' missing data pattern using mice::md.pattern and applies disclosure control to
#' prevent revealing small cell counts.
#' @details This function calls the mice::md.pattern function to generate a matrix
#' showing the missing data patterns in the input data. To ensure disclosure control,
#' any pattern counts that are below the threshold (nfilter.tab, default=3) are
#' suppressed.
#'
#' \strong{Suppression Method:}
#'
#' When a pattern count is below threshold:
#' - Row name is changed to "suppressed(<N>)" where N is the threshold
#' - All pattern values in that row are set to NA
#' - Summary row is also set to NA (prevents back-calculation)
#'
#' \strong{Output Matrix Structure:}
#'
#' - Rows represent different missing data patterns (plus a summary row at the bottom)
#' - Row names contain pattern counts (or "suppressed(<N>)" for invalid patterns)
#' - Columns show 1 if variable is observed, 0 if missing
#' - Last column shows total number of missing values per pattern
#' - Last row shows total number of missing values per variable
#'
#' \strong{Note for Pooling:}
#'
#' When this function is called from ds.mdPattern with type='combine', suppressed
#' patterns are excluded from pooling to prevent disclosure through subtraction.
#' This means pooled counts may underestimate the true total when patterns are
#' suppressed in some studies.
#'
#' @param x a character string specifying the name of a data frame or matrix
#' containing the data to analyze for missing patterns.
#' @return A list containing:
#' \item{pattern}{The missing data pattern matrix with disclosure control applied}
#' \item{valid}{Logical indicating if all patterns meet disclosure requirements}
#' \item{message}{A message describing the validity status}
#' @author Xavier Escribà montagut for DataSHIELD Development Team
#' @import mice
#' @export
#'
mdPatternDS <- function(x){

#############################################################
# MODULE 1: CAPTURE THE nfilter SETTINGS
thr <- dsBase::listDisclosureSettingsDS()
nfilter.tab <- as.numeric(thr$nfilter.tab)
#############################################################

# Parse the input data name with error handling
x.val <- tryCatch(
{
eval(parse(text=x), envir = parent.frame())
},
error = function(e) {
stop(paste0("Object '", x, "' does not exist on the server"), call. = FALSE)
}
)

# Check object class
typ <- class(x.val)

# Check that input is a data frame or matrix
if(!("data.frame" %in% typ || "matrix" %in% typ)){
stop(paste0("The input object must be of type 'data.frame' or 'matrix'. Current type: ",
paste(typ, collapse = ", ")), call. = FALSE)
}

# Use x.val for further processing
x <- x.val

# Call mice::md.pattern with plot=FALSE
pattern <- mice::md.pattern(x, plot = FALSE)

# Apply disclosure control
# Pattern counts are stored in row names (except last row which is empty/summary)
# The last row contains variable-level missing counts

validity <- "valid"
n_patterns <- nrow(pattern) - 1 # exclude the summary row

if(n_patterns > 0){
# Check pattern counts (stored in row names, excluding last row)
pattern_counts <- as.numeric(rownames(pattern)[1:n_patterns])

# Find patterns with counts below threshold
invalid_idx <- which(pattern_counts > 0 & pattern_counts < nfilter.tab)

if(length(invalid_idx) > 0){
validity <- "invalid"

# For invalid patterns, suppress by:
# - Setting row name to "suppressed"
# - Setting all pattern values to NA
rnames <- rownames(pattern)
for(idx in invalid_idx){
rnames[idx] <- paste0("suppressed(<", nfilter.tab, ")")
pattern[idx, ] <- NA
}
rownames(pattern) <- rnames

# Also need to recalculate the last row (summary) if patterns were suppressed
# Set to NA to avoid disclosures
pattern[nrow(pattern), seq_len(ncol(pattern))] <- NA
}
}

# Return the pattern with validity information
return(list(
pattern = pattern,
valid = (validity == "valid"),
message = ifelse(validity == "valid",
"Valid: all pattern counts meet disclosure requirements",
paste0("Invalid: some pattern counts below threshold (",
nfilter.tab, ") have been suppressed"))
))
}

#AGGREGATE FUNCTION
# mdPatternDS
14 changes: 8 additions & 6 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ variables:
branchName: $(Build.SourceBranchName)
test_filter: '*'
_r_check_system_clock_: 0
PERF_PROFILE: 'azure-pipeline'



Expand All @@ -46,17 +47,18 @@ resources:
# When and under what condition to run the pipeline.
schedules:
- cron: "0 0 * * 0"
displayName: Weekly build - master
displayName: Weekly build - latest release
branches:
include:
- master
- 6.3.0
- 6.3.4
always: true
- cron: "0 1 * * *"
displayName: Nightly build - v6.3.1-dev
displayName: Nightly build - development branchs
branches:
include:
- v6.3.1-dev
- v6.3.5-dev
- v6.4.0-dev
- v7.0.0-dev
always: true

jobs:
Expand Down Expand Up @@ -188,6 +190,7 @@ jobs:
# testthat::testpackage uses a MultiReporter, comprised of a ProgressReporter and JunitReporter
# R output and messages are redirected by sink() to test_console_output.txt
# junit reporter output is to test_results.xml

sudo R -q -e '
library(covr);
write.csv(
Expand Down Expand Up @@ -250,7 +253,6 @@ jobs:
echo 'branch:'$(branchName) >> $(datetime).txt
echo 'os:'$(lsb_release -ds) >> $(datetime).txt
echo 'R:'$(R --version | head -n 1) >> $(datetime).txt
echo 'opal:'$(opal system --opal localhost:8080 --user administrator --password "datashield_test&" --version) >> $(datetime).txt

workingDirectory: $(Pipeline.Workspace)/logs
displayName: 'Write versions to file'
Expand Down
4 changes: 2 additions & 2 deletions docs/404.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions docs/LICENSE.html

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading