Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/autoformat.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
token: ${{ secrets.GITHUB_TOKEN }}

- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v6
with:
python-version: '3.11'

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
steps:
- uses: actions/checkout@v6

- uses: actions/setup-python@v5
- uses: actions/setup-python@v6
with:
python-version: "3.x"

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,13 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
python-version: ["3.11", "3.12", "3.13", "3.14"]

steps:
- uses: actions/checkout@v6

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}

Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

version = importlib.metadata.version("pySEQTarget")
if not version:
version = "0.12.0"
version = "0.12.1"
sys.path.insert(0, os.path.abspath("../"))

project = "pySEQTarget"
Expand Down
23 changes: 16 additions & 7 deletions pySEQTarget/SEQopts.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ class SEQopts:
:type bootstrap_CI_method: str
:param cense_colname: Column name for censoring effect (LTFU, etc.)
:type cense_colname: str
:param cense_denominator: Override to specify denominator patsy formula for censoring models; "1" or "" indicate intercept only model
:param cense_denominator: Override to specify denominator patsy formula for
censoring models; "1" or "" indicate intercept only model
:type cense_denominator: Optional[str] or None
:param cense_numerator: Override to specify numerator patsy formula for censoring models
:type cense_numerator: Optional[str] or None
Expand Down Expand Up @@ -55,7 +56,8 @@ class SEQopts:
:type km_curves: bool
:param ncores: Number of cores to use if running in parallel
:type ncores: int
:param numerator: Override to specify the outcome patsy formula for numerator models; "1" or "" indicate intercept only model
:param numerator: Override to specify the outcome patsy formula for
numerator models; "1" or "" indicate intercept only model
:type numerator: str
:param offload: Boolean to offload intermediate model data to disk
:type offload: bool
Expand Down Expand Up @@ -87,7 +89,8 @@ class SEQopts:
:type trial_include: bool
:param visit_colname: Column name specifying visit number
:type visit_colname: str
:param weight_eligible_colnames: List of column names of length treatment_level to identify which rows are eligible for weight fitting
:param weight_eligible_colnames: List of column names of length
treatment_level to identify which rows are eligible for weight fitting
:type weight_eligible_colnames: List[str]
:param weight_fit_method: The fitting method to be used ["newton", "bfgs", "lbfgs", "nm"], default "newton"
:type weight_fit_method: str
Expand Down Expand Up @@ -155,7 +158,7 @@ class SEQopts:
weight_preexpansion: bool = False
weighted: bool = False

def __post_init__(self):
def _validate_bools(self):
bools = [
"excused",
"followup_class",
Expand All @@ -176,27 +179,27 @@ def __post_init__(self):
if not isinstance(getattr(self, i), bool):
raise TypeError(f"{i} must be a boolean value.")

def _validate_ranges(self):
if not isinstance(self.bootstrap_nboot, int) or self.bootstrap_nboot < 0:
raise ValueError("bootstrap_nboot must be a positive integer.")

if self.ncores < 1 or not isinstance(self.ncores, int):
raise ValueError("ncores must be a positive integer.")

if not (0.0 <= self.bootstrap_sample <= 1.0):
raise ValueError("bootstrap_sample must be between 0 and 1.")
if not (0.0 < self.bootstrap_CI < 1.0):
raise ValueError("bootstrap_CI must be between 0 and 1.")
if not (0.0 <= self.selection_sample <= 1.0):
raise ValueError("selection_sample must be between 0 and 1.")

def _validate_choices(self):
if self.plot_type not in ["risk", "survival", "incidence"]:
raise ValueError(
"plot_type must be either 'risk', 'survival', or 'incidence'."
)

if self.bootstrap_CI_method not in ["se", "percentile"]:
raise ValueError("bootstrap_CI_method must be one of 'se' or 'percentile'")

def _normalize_formulas(self):
for i in (
"covariates",
"numerator",
Expand All @@ -208,5 +211,11 @@ def __post_init__(self):
if attr is not None and not isinstance(attr, list):
setattr(self, i, "".join(attr.split()))

def __post_init__(self):
self._validate_bools()
self._validate_ranges()
self._validate_choices()
self._normalize_formulas()

if self.offload:
os.makedirs(self.offload_dir, exist_ok=True)
4 changes: 3 additions & 1 deletion pySEQTarget/SEQoutput.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,9 @@ def retrieve_data(
) -> pl.DataFrame:
"""
Getter for data stored within ``SEQoutput``
:param type: Data which you would like to access, ['km_data', 'hazard', 'risk_ratio', 'risk_difference', 'unique_outcomes', 'nonunique_outcomes', 'unique_switches', 'nonunique_switches']
:param type: Data which you would like to access, ['km_data', 'hazard',
'risk_ratio', 'risk_difference', 'unique_outcomes',
'nonunique_outcomes', 'unique_switches', 'nonunique_switches']
:type type: str
"""
match type:
Expand Down
28 changes: 19 additions & 9 deletions pySEQTarget/analysis/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,19 @@
from ._hazard import _calculate_hazard as _calculate_hazard
from ._outcome_fit import _outcome_fit as _outcome_fit
from ._risk_estimates import _risk_estimates as _risk_estimates
from ._subgroup_fit import _subgroup_fit as _subgroup_fit
from ._survival_pred import _calculate_survival as _calculate_survival
from ._survival_pred import _clamp as _clamp
from ._survival_pred import \
_get_outcome_predictions as _get_outcome_predictions
from ._survival_pred import _pred_risk as _pred_risk
from ._hazard import _calculate_hazard
from ._outcome_fit import _outcome_fit
from ._risk_estimates import _risk_estimates
from ._subgroup_fit import _subgroup_fit
from ._survival_pred import _calculate_survival
from ._survival_pred import _clamp
from ._survival_pred import _get_outcome_predictions
from ._survival_pred import _pred_risk

__all__ = [
"_calculate_hazard",
"_outcome_fit",
"_risk_estimates",
"_subgroup_fit",
"_calculate_survival",
"_clamp",
"_get_outcome_predictions",
"_pred_risk",
]
36 changes: 18 additions & 18 deletions pySEQTarget/analysis/_hazard.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@ def _calculate_hazard(self):


def _calculate_hazard_single(self, data, idx=None, val=None):
full_hr = _hazard_handler(self, data, idx, 0, self._rng)
full_log_hr = _hazard_handler(self, data, idx, 0, self._rng)

if full_hr is None or np.isnan(full_hr):
if full_log_hr is None or np.isnan(full_log_hr):
return _create_hazard_output(None, None, None, val, self)

if self.bootstrap_nboot > 0:
boot_hrs = []
boot_log_hrs = []

for boot_idx in range(len(self._boot_samples)):
id_counts = self._boot_samples[boot_idx]
Expand All @@ -43,27 +43,27 @@ def _calculate_hazard_single(self, data, idx=None, val=None):

boot_data = pl.concat(boot_data_list)

boot_hr = _hazard_handler(self, boot_data, idx, boot_idx + 1, self._rng)
if boot_hr is not None and not np.isnan(boot_hr):
boot_hrs.append(boot_hr)
boot_log_hr = _hazard_handler(self, boot_data, idx, boot_idx + 1, self._rng)
if boot_log_hr is not None and not np.isnan(boot_log_hr):
boot_log_hrs.append(boot_log_hr)

if len(boot_hrs) == 0:
return _create_hazard_output(full_hr, None, None, val, self)
if len(boot_log_hrs) == 0:
return _create_hazard_output(np.exp(full_log_hr), None, None, val, self)

if self.bootstrap_CI_method == "se":
from scipy.stats import norm

z = norm.ppf(1 - (1 - self.bootstrap_CI) / 2)
se = np.std(boot_hrs)
lci = full_hr - z * se
uci = full_hr + z * se
se = np.std(boot_log_hrs)
lci = np.exp(full_log_hr - z * se)
uci = np.exp(full_log_hr + z * se)
else:
lci = np.quantile(boot_hrs, (1 - self.bootstrap_CI) / 2)
uci = np.quantile(boot_hrs, 1 - (1 - self.bootstrap_CI) / 2)
lci = np.exp(np.quantile(boot_log_hrs, (1 - self.bootstrap_CI) / 2))
uci = np.exp(np.quantile(boot_log_hrs, 1 - (1 - self.bootstrap_CI) / 2))
else:
lci, uci = None, None

return _create_hazard_output(full_hr, lci, uci, val, self)
return _create_hazard_output(np.exp(full_log_hr), lci, uci, val, self)


def _hazard_handler(self, data, idx, boot_idx, rng):
Expand Down Expand Up @@ -191,8 +191,8 @@ def _hazard_handler(self, data, idx, boot_idx, rng):
formula=f"`{self.treatment_col}{self.indicator_baseline}`",
)

hr = np.exp(cph.params_.values[0])
return hr
log_hr = cph.params_.values[0]
return log_hr
except Exception as e:
print(f"Cox model fitting failed: {e}")
return None
Expand All @@ -202,13 +202,13 @@ def _create_hazard_output(hr, lci, uci, val, self):
if lci is not None and uci is not None:
output = pl.DataFrame(
{
"Hazard": [hr if hr is not None else float("nan")],
"Hazard ratio": [hr if hr is not None else float("nan")],
"LCI": [lci],
"UCI": [uci],
}
)
else:
output = pl.DataFrame({"Hazard": [hr if hr is not None else float("nan")]})
output = pl.DataFrame({"Hazard ratio": [hr if hr is not None else float("nan")]})

if val is not None:
output = output.with_columns(pl.lit(val).alias(self.subgroup_colname))
Expand Down
73 changes: 40 additions & 33 deletions pySEQTarget/analysis/_outcome_fit.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,44 @@
import statsmodels.formula.api as smf


def _apply_spline_formula(formula, indicator_squared):
spline = "cr(followup, df=3)"

formula = re.sub(r"(\w+)\s*\*\s*followup\b", rf"\1*{spline}", formula)
formula = re.sub(r"\bfollowup\s*\*\s*(\w+)", rf"{spline}*\1", formula)
formula = re.sub(
rf"\bfollowup{re.escape(indicator_squared)}\b", "", formula
)
formula = re.sub(r"\bfollowup\b", "", formula)

formula = re.sub(r"\s+", " ", formula)
formula = re.sub(r"\+\s*\+", "+", formula)
formula = re.sub(r"^\s*\+\s*|\s*\+\s*$", "", formula).strip()

if formula:
return f"{formula} + I({spline}**2)"
return f"I({spline}**2)"


def _cast_categories(self, df_pd):
df_pd[self.treatment_col] = df_pd[self.treatment_col].astype("category")
tx_bas = f"{self.treatment_col}{self.indicator_baseline}"
df_pd[tx_bas] = df_pd[tx_bas].astype("category")

if self.followup_class and not self.followup_spline:
df_pd["followup"] = df_pd["followup"].astype("category")
squared_col = f"followup{self.indicator_squared}"
if squared_col in df_pd.columns:
df_pd[squared_col] = df_pd[squared_col].astype("category")

if self.fixed_cols:
for col in self.fixed_cols:
if col in df_pd.columns:
df_pd[col] = df_pd[col].astype("category")

return df_pd


def _outcome_fit(
self,
df: pl.DataFrame,
Expand All @@ -23,41 +61,10 @@ def _outcome_fit(
if self.method == "censoring":
df = df.filter(pl.col("switch") != 1)

df_pd = df.to_pandas()

df_pd[self.treatment_col] = df_pd[self.treatment_col].astype("category")
tx_bas = f"{self.treatment_col}{self.indicator_baseline}"
df_pd[tx_bas] = df_pd[tx_bas].astype("category")

if self.followup_class and not self.followup_spline:
df_pd["followup"] = df_pd["followup"].astype("category")
squared_col = f"followup{self.indicator_squared}"
if squared_col in df_pd.columns:
df_pd[squared_col] = df_pd[squared_col].astype("category")
df_pd = _cast_categories(self, df.to_pandas())

if self.followup_spline:
spline = "cr(followup, df=3)"

formula = re.sub(r"(\w+)\s*\*\s*followup\b", rf"\1*{spline}", formula)
formula = re.sub(r"\bfollowup\s*\*\s*(\w+)", rf"{spline}*\1", formula)
formula = re.sub(
rf"\bfollowup{re.escape(self.indicator_squared)}\b", "", formula
)
formula = re.sub(r"\bfollowup\b", "", formula)

formula = re.sub(r"\s+", " ", formula)
formula = re.sub(r"\+\s*\+", "+", formula)
formula = re.sub(r"^\s*\+\s*|\s*\+\s*$", "", formula).strip()

if formula:
formula = f"{formula} + I({spline}**2)"
else:
formula = f"I({spline}**2)"

if self.fixed_cols:
for col in self.fixed_cols:
if col in df_pd.columns:
df_pd[col] = df_pd[col].astype("category")
formula = _apply_spline_formula(formula, self.indicator_squared)

full_formula = f"{outcome} ~ {formula}"

Expand Down
9 changes: 7 additions & 2 deletions pySEQTarget/error/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
from ._data_checker import _data_checker as _data_checker
from ._param_checker import _param_checker as _param_checker
from ._data_checker import _data_checker
from ._param_checker import _param_checker

__all__ = [
"_data_checker",
"_param_checker",
]
18 changes: 13 additions & 5 deletions pySEQTarget/expansion/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
from ._binder import _binder as _binder
from ._diagnostics import _diagnostics as _diagnostics
from ._dynamic import _dynamic as _dynamic
from ._mapper import _mapper as _mapper
from ._selection import _random_selection as _random_selection
from ._binder import _binder
from ._diagnostics import _diagnostics
from ._dynamic import _dynamic
from ._mapper import _mapper
from ._selection import _random_selection

__all__ = [
"_binder",
"_diagnostics",
"_dynamic",
"_mapper",
"_random_selection",
]
30 changes: 21 additions & 9 deletions pySEQTarget/helpers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,21 @@
from ._bootstrap import bootstrap_loop as bootstrap_loop
from ._col_string import _col_string as _col_string
from ._format_time import _format_time as _format_time
from ._offloader import Offloader as Offloader
from ._output_files import _build_md as _build_md
from ._output_files import _build_pdf as _build_pdf
from ._pad import _pad as _pad
from ._predict_model import _predict_model as _predict_model
from ._prepare_data import _prepare_data as _prepare_data
from ._bootstrap import bootstrap_loop
from ._col_string import _col_string
from ._format_time import _format_time
from ._offloader import Offloader
from ._output_files import _build_md
from ._output_files import _build_pdf
from ._pad import _pad
from ._predict_model import _predict_model
from ._prepare_data import _prepare_data

__all__ = [
"bootstrap_loop",
"_col_string",
"_format_time",
"Offloader",
"_build_md",
"_build_pdf",
"_pad",
"_predict_model",
"_prepare_data",
]
Loading