{ "cells": [ { "cell_type": "markdown", "id": "ed5cfd16", "metadata": {}, "source": [ "# Hurricane multiverse (non-neuroimaging)\n", "\n", "The Comet toolbox is developed primarily for fMRI brain network analyses. However, we here showcases how the versatile multiverse workflow can also be applied outside of neuroimaging.\n", "\n", "This script is a re-implementation og the famous *“Female hurricanes are deadlier than male hurricanes”* multiverse analysis originally presented in [Simonsohn et al. (2020)](https://www.nature.com/articles/s41562-020-0912-z). Their study revisited [Jung et al. (2014)](https://www.pnas.org/doi/full/10.1073/pnas.1402786111), who reported that hurricanes with more feminine names caused more fatalities. Simonsohn and colleagues systematically explored how robust this conclusion is across a large range of reasonable analytic specifications.\n", "\n", "The Comet toolbox provides a Python-native multiverse framework that allows the entire hurricane multiverse analysis to be expressed with two simple objects:\n", "\n", "- A dictionary of forking paths\n", "- A single analysis template function containing placeholders for all decisions\n", "\n", "Existing examples (R implementations) of this example can be found here:\n", "\n", "- multiverse package: https://cran.r-project.org/web/packages/multiverse/vignettes/example-hurricane.html\n", "- mverse package: https://mverseanalysis.github.io/mverse/index.html" ] }, { "cell_type": "code", "execution_count": 1, "id": "488e043d", "metadata": {}, "outputs": [], "source": [ "from comet.multiverse import Multiverse\n", "\n", "forking_paths = {\n", " \"death_outliers\": [2, 1, 0], # Drop n hurricanes with the most deaths\n", " \"damage_outliers\": [3, 2, 1, 0], # Drop n hurricanes with the most damage\n", "\n", " \"femininity_rating\": [\"fem_likert\", \"fem_binary\"], # Femininity rating (11-point likert or binary)\n", " \"damage_functional\": [\"ln\", \"linear\"], # Damage functional (ln or no transformation)\n", "\n", " \"effect_type\": [\"femininity * damages\",\n", " \"femininity * damages + femininity * zpressure\",\n", " \"femininity * damages + femininity * zwind\",\n", " \"femininity * damages + femininity * zcat\",\n", " \"femininity * damages + femininity * z3\",\n", " \"femininity + damages + z3\"], # Interactions or main effect only)\n", " \"year_interaction\": [\"\", \" + post1979:damages\", \" + year:damages\"], # Interactions with year\n", " \"model\": [\"log_linear\", \"neg_binomial\"] # Model (negative binomial or log-linear)\n", "}\n", "\n", "def analysis_template():\n", " import comet\n", " import numpy as np\n", " import statsmodels.api as sm\n", " import statsmodels.formula.api as smf\n", "\n", " # Load hurricane data\n", " df = comet.utils.load_example(\"hurricane\")\n", "\n", " # Create derived predictors\n", " df[\"post1979\"] = (df[\"elapsed_years\"] > 1979).astype(int)\n", " df[\"zcat\"] = (df[\"category\"] - df[\"category\"].mean()) / df[\"category\"].std()\n", " df[\"zpressure\"] = -((df[\"pressure\"] - df[\"pressure\"].mean()) / df[\"pressure\"].std())\n", " df[\"zwind\"] = (df[\"wind\"] - df[\"wind\"].mean()) / df[\"wind\"].std()\n", " df[\"z3\"] = (df[\"zpressure\"] + df[\"zcat\"] + df[\"zwind\"]) / 3.0\n", "\n", " # Decisions 1 & 2: Exclude outliers\n", " top_deaths = df[\"deaths\"].nlargest({{death_outliers}}).unique()\n", " top_damage = df[\"damages\"].nlargest({{damage_outliers}}).unique()\n", " df = df[~df[\"deaths\"].isin(top_deaths) & ~df[\"damages\"].isin(top_damage)]\n", "\n", " # Decision 2: Choose femininity rating\n", " df[\"femininity\"] = df[{{femininity_rating}}]\n", "\n", " # Decision 3: Transform damage variable\n", " if {{damage_functional}} == \"ln\":\n", " df[\"damages\"] = np.log(df[\"damages\"])\n", "\n", " # Decision 4 & 5: Effect type & account for year\n", " formula = \"deaths ~ \" + {{effect_type}} + {{year_interaction}}\n", "\n", " # Decision 6: Model type and fitting\n", " if {{model}} == \"log_linear\":\n", " df[\"deaths\"] = np.log(df[\"deaths\"] + 1)\n", " fit = smf.ols(formula, data=df).fit()\n", " else:\n", " fit = smf.glm(formula, data=df, family=sm.families.NegativeBinomial(alpha=1)).fit()\n", "\n", " # Results: Set femininity for male vs female and predict deaths at the sample means\n", " if {{femininity_rating}} == \"fem_likert\":\n", " fem_male = df[\"femininity\"][df[\"fem_binary\"] == 0].mean()\n", " fem_fem = df[\"femininity\"][df[\"fem_binary\"] == 1].mean()\n", " else:\n", " fem_male = 0\n", " fem_fem = 1\n", "\n", " base = df.mean(numeric_only=True).to_frame().T\n", "\n", " base_male = base.copy()\n", " base_male[\"femininity\"] = fem_male\n", "\n", " base_fem = base.copy()\n", " base_fem[\"femininity\"] = fem_fem\n", "\n", " if {{model}} == \"log_linear\":\n", " # Predict and back-transform (slightly simplified)\n", " pred_m = np.exp(fit.predict(base_male)) - 1\n", " pred_f = np.exp(fit.predict(base_fem)) - 1\n", " else:\n", " pred_m = fit.predict(base_male)\n", " pred_f = fit.predict(base_fem)\n", "\n", " # Get the additional deaths as multiverse outcome measure\n", " extra_deaths = pred_f.values - pred_m.values\n", " # The p-values mix interaction with main effect only so its a bit hacky, but good enough for illustration\n", " p_val = fit.pvalues[\"femininity:damages\"] if \"femininity:damages\" in fit.pvalues.index else fit.pvalues[\"femininity\"] \n", " \n", " comet.utils.save_universe_results({\"extra_deaths\": extra_deaths, \"p_val\": p_val})" ] }, { "cell_type": "code", "execution_count": 2, "id": "c2925495", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | Universe | \n", "Decision 1 | \n", "Value 1 | \n", "Decision 2 | \n", "Value 2 | \n", "Decision 3 | \n", "Value 3 | \n", "Decision 4 | \n", "Value 4 | \n", "Decision 5 | \n", "Value 5 | \n", "Decision 6 | \n", "Value 6 | \n", "Decision 7 | \n", "Value 7 | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "Universe_1 | \n", "death_outliers | \n", "2 | \n", "damage_outliers | \n", "3 | \n", "femininity_rating | \n", "fem_likert | \n", "damage_functional | \n", "ln | \n", "effect_type | \n", "femininity * damages | \n", "year_interaction | \n", "NaN | \n", "model | \n", "log_linear | \n", "
| 1 | \n", "Universe_2 | \n", "death_outliers | \n", "2 | \n", "damage_outliers | \n", "3 | \n", "femininity_rating | \n", "fem_likert | \n", "damage_functional | \n", "ln | \n", "effect_type | \n", "femininity * damages | \n", "year_interaction | \n", "NaN | \n", "model | \n", "neg_binomial | \n", "
| 2 | \n", "Universe_3 | \n", "death_outliers | \n", "2 | \n", "damage_outliers | \n", "3 | \n", "femininity_rating | \n", "fem_likert | \n", "damage_functional | \n", "ln | \n", "effect_type | \n", "femininity * damages | \n", "year_interaction | \n", "+ post1979:damages | \n", "model | \n", "log_linear | \n", "
| 3 | \n", "Universe_4 | \n", "death_outliers | \n", "2 | \n", "damage_outliers | \n", "3 | \n", "femininity_rating | \n", "fem_likert | \n", "damage_functional | \n", "ln | \n", "effect_type | \n", "femininity * damages | \n", "year_interaction | \n", "+ post1979:damages | \n", "model | \n", "neg_binomial | \n", "
| 4 | \n", "Universe_5 | \n", "death_outliers | \n", "2 | \n", "damage_outliers | \n", "3 | \n", "femininity_rating | \n", "fem_likert | \n", "damage_functional | \n", "ln | \n", "effect_type | \n", "femininity * damages | \n", "year_interaction | \n", "+ year:damages | \n", "model | \n", "log_linear | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 1723 | \n", "Universe_1724 | \n", "death_outliers | \n", "0 | \n", "damage_outliers | \n", "0 | \n", "femininity_rating | \n", "fem_binary | \n", "damage_functional | \n", "linear | \n", "effect_type | \n", "femininity + damages + z3 | \n", "year_interaction | \n", "NaN | \n", "model | \n", "neg_binomial | \n", "
| 1724 | \n", "Universe_1725 | \n", "death_outliers | \n", "0 | \n", "damage_outliers | \n", "0 | \n", "femininity_rating | \n", "fem_binary | \n", "damage_functional | \n", "linear | \n", "effect_type | \n", "femininity + damages + z3 | \n", "year_interaction | \n", "+ post1979:damages | \n", "model | \n", "log_linear | \n", "
| 1725 | \n", "Universe_1726 | \n", "death_outliers | \n", "0 | \n", "damage_outliers | \n", "0 | \n", "femininity_rating | \n", "fem_binary | \n", "damage_functional | \n", "linear | \n", "effect_type | \n", "femininity + damages + z3 | \n", "year_interaction | \n", "+ post1979:damages | \n", "model | \n", "neg_binomial | \n", "
| 1726 | \n", "Universe_1727 | \n", "death_outliers | \n", "0 | \n", "damage_outliers | \n", "0 | \n", "femininity_rating | \n", "fem_binary | \n", "damage_functional | \n", "linear | \n", "effect_type | \n", "femininity + damages + z3 | \n", "year_interaction | \n", "+ year:damages | \n", "model | \n", "log_linear | \n", "
| 1727 | \n", "Universe_1728 | \n", "death_outliers | \n", "0 | \n", "damage_outliers | \n", "0 | \n", "femininity_rating | \n", "fem_binary | \n", "damage_functional | \n", "linear | \n", "effect_type | \n", "femininity + damages + z3 | \n", "year_interaction | \n", "+ year:damages | \n", "model | \n", "neg_binomial | \n", "
1728 rows × 15 columns
\n", "