From b6a31f61206a2f3a98e7cde1ee1f3fd471edae8d Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Mon, 30 Mar 2026 17:21:23 +0000
Subject: [PATCH] fix: remove numpy/scipy symbol re-exports from datasets
 module (issue #981)

Remove bare 'from numpy.random import choice' and replace the three call
sites with 'np.random.choice(...)'. This stops the numpy 'choice' symbol
from appearing as part of dowhy.datasets's public API.

Remove bare 'from scipy.stats import bernoulli, halfnorm, poisson, uniform'
and replace all call sites in sales_dataset() with the already-imported
alias 'ss.*' (scipy.stats is already imported as 'ss'). This stops four
scipy distribution objects from polluting the module namespace and confusing
Sphinx source-link generation.

No functional change; behaviour is identical.

Closes #981

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 dowhy/datasets.py | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/dowhy/datasets.py b/dowhy/datasets.py
index 6d92167864..9be4e3ebf2 100755
--- a/dowhy/datasets.py
+++ b/dowhy/datasets.py
@@ -7,8 +7,6 @@
 import numpy as np
 import pandas as pd
 import scipy.stats as ss
-from numpy.random import choice
-from scipy.stats import bernoulli, halfnorm, poisson, uniform
 from sklearn.neural_network import MLPRegressor
 
 from dowhy.utils.graph_operations import add_edge, del_edge, get_random_node_pair, get_simple_ordered_tree
@@ -25,14 +23,14 @@ def sigmoid(x):
 def convert_to_binary(x, stochastic=True):
     p = sigmoid(x)
     if stochastic:
-        return choice([0, 1], p=[1 - p, p])
+        return np.random.choice([0, 1], p=[1 - p, p])
     else:
         return int(p > 0.5)
 
 
 def stochastically_convert_to_three_level_categorical(x):
     p = sigmoid(x)
-    return choice([0, 1, 2], p=[0.8 * (1 - p), 0.8 * p, 0.2])
+    return np.random.choice([0, 1, 2], p=[0.8 * (1 - p), 0.8 * p, 0.2])
 
 
 def convert_to_categorical(arr, num_vars, num_discrete_vars, quantiles=[0.25, 0.5, 0.75], one_hot_encode=False):
@@ -590,7 +588,7 @@ def create_discrete_column(num_samples, std_dev=1):
         xL, scale=std_dev
     )  # probability of selecting a number x is p(x-0.5 < x < x+0.5) where x is a normal random variable with mean 0 and standard deviation std_dev
     prob = prob / prob.sum()  # normalize the probabilities so their sum is 1
-    nums = choice(a=x, size=num_samples, p=prob)  # pick up an element
+    nums = np.random.choice(a=x, size=num_samples, p=prob)  # pick up an element
     return nums
 
 
@@ -1099,22 +1097,22 @@ def sales_dataset(
 
     df[ad_spend_col] = (
         based_ad_spending
-        + df[shopping_event_col] * uniform.rvs(loc=1000, scale=1000, size=df.shape[0])
-        + (1 - df[shopping_event_col]) * uniform.rvs(loc=100, scale=400, size=df.shape[0])
+        + df[shopping_event_col] * ss.uniform.rvs(loc=1000, scale=1000, size=df.shape[0])
+        + (1 - df[shopping_event_col]) * ss.uniform.rvs(loc=100, scale=400, size=df.shape[0])
     )
 
     df[page_visit_col] = (
-        poisson.rvs(mu=10000 * page_visitor_factor, size=df.shape[0])
-        + uniform.rvs(loc=5000 * page_visitor_factor, scale=5000, size=df.shape[0]) * df[shopping_event_col]
-        + halfnorm.rvs(loc=0.5 * page_visitor_factor, scale=0.01, size=df.shape[0]) * df[ad_spend_col]
-        + halfnorm.rvs(loc=1000 * page_visitor_factor, scale=100, size=df.shape[0])
+        ss.poisson.rvs(mu=10000 * page_visitor_factor, size=df.shape[0])
+        + ss.uniform.rvs(loc=5000 * page_visitor_factor, scale=5000, size=df.shape[0]) * df[shopping_event_col]
+        + ss.halfnorm.rvs(loc=0.5 * page_visitor_factor, scale=0.01, size=df.shape[0]) * df[ad_spend_col]
+        + ss.halfnorm.rvs(loc=1000 * page_visitor_factor, scale=100, size=df.shape[0])
     )
     df[page_visit_col] = df[page_visit_col].astype(int)
 
     df[price_col] = (
         base_price
-        + uniform.rvs(loc=-200, scale=200, size=df.shape[0]) * df[shopping_event_col]
-        + bernoulli.rvs(p=0.02, size=df.shape[0]) * uniform.rvs(loc=-20, scale=20, size=df.shape[0])
+        + ss.uniform.rvs(loc=-200, scale=200, size=df.shape[0]) * df[shopping_event_col]
+        + ss.bernoulli.rvs(p=0.02, size=df.shape[0]) * ss.uniform.rvs(loc=-20, scale=20, size=df.shape[0])
     )
 
     price_changes = 1 - df[price_col] / original_product_price
@@ -1123,9 +1121,9 @@ def sales_dataset(
     price_changes[price_changes == 0] = 1
 
     df[units_sold_col] = [
-        poisson.rvs(
+        ss.poisson.rvs(
             mu=demand_changes.iloc[i] / price_changes.iloc[i] * 0.2 * df[page_visit_col].iloc[i]
-            + uniform.rvs(loc=100, scale=1000) * df[shopping_event_col].iloc[i]
+            + ss.uniform.rvs(loc=100, scale=1000) * df[shopping_event_col].iloc[i]
         )
         for i in range(df.shape[0])
     ]
@@ -1136,7 +1134,7 @@ def sales_dataset(
     df[operation_col] = (
         df[ad_spend_col]
         + product_production_cost * df[units_sold_col]
-        + halfnorm.rvs(loc=500000, scale=10, size=df.shape[0])
+        + ss.halfnorm.rvs(loc=500000, scale=10, size=df.shape[0])
     )
 
     df[profit_col] = df[revenue_col] - df[operation_col]