diff --git a/cvxpy/atoms/affine/binary_operators.py b/cvxpy/atoms/affine/binary_operators.py index 9fa898661a..d54f725699 100644 --- a/cvxpy/atoms/affine/binary_operators.py +++ b/cvxpy/atoms/affine/binary_operators.py @@ -190,6 +190,9 @@ def _grad(self, values): X = values[0] Y = values[1] + if np.isscalar(X) or np.isscalar(Y): + return [sp.csc_matrix(Y), sp.csc_matrix(X)] + # Get dimensions m, n = self.args[0].shape if len(self.args[0].shape) == 2 else (self.args[0].size, 1) n2, p = self.args[1].shape if len(self.args[1].shape) == 2 else (self.args[1].size, 1) @@ -198,7 +201,7 @@ def _grad(self, values): assert n == n2, f"Inner dimensions must match for multiplication: {n} != {n2}" # Compute ∂vec(Z)/∂vec(X) = (Y.T ⊗ I_m).T - # This is a (m*n) × (m*p) matrix + # This is a (m*n) × (m*p) matrix DX = sp.kron(Y.T, sp.eye(m, format='csc'), format='csc').T # Compute ∂vec(Z)/∂vec(Y) = (I_p ⊗ X).T diff --git a/cvxpy/reductions/expr2smooth/canonicalizers/div_canon.py b/cvxpy/reductions/expr2smooth/canonicalizers/div_canon.py index 8bcf5d17c6..16b1f57ee7 100644 --- a/cvxpy/reductions/expr2smooth/canonicalizers/div_canon.py +++ b/cvxpy/reductions/expr2smooth/canonicalizers/div_canon.py @@ -14,8 +14,10 @@ limitations under the License. """ +from cvxpy.atoms.affine.binary_operators import multiply from cvxpy.expressions.variable import Variable + # We canonicalize div(x, y) as z * y = x. def div_canon(expr, args): # TODO: potential bounds here? @@ -33,4 +35,4 @@ def div_canon(expr, args): else: z.value = expr.point_in_domain() # TODO: should we also include y >= 0 here? - return z, [z * y == args[0], y == args[1]]#], #y >= 0, z >= 0] \ No newline at end of file + return z, [multiply(z, y) == args[0], y == args[1]]#, y >= 0, z >= 0] diff --git a/cvxpy/reductions/expr2smooth/canonicalizers/log_canon.py b/cvxpy/reductions/expr2smooth/canonicalizers/log_canon.py index 31a64b00e3..dbb27dd7a2 100644 --- a/cvxpy/reductions/expr2smooth/canonicalizers/log_canon.py +++ b/cvxpy/reductions/expr2smooth/canonicalizers/log_canon.py @@ -18,7 +18,7 @@ def log_canon(expr, args): - t = Variable(args[0].size) + t = Variable(args[0].shape) if args[0].value is not None: t.value = args[0].value else: diff --git a/cvxpy/reductions/expr2smooth/canonicalizers/pnorm_canon.py b/cvxpy/reductions/expr2smooth/canonicalizers/pnorm_canon.py index 21ff64697d..977c2fa935 100644 --- a/cvxpy/reductions/expr2smooth/canonicalizers/pnorm_canon.py +++ b/cvxpy/reductions/expr2smooth/canonicalizers/pnorm_canon.py @@ -19,8 +19,7 @@ def pnorm_canon(expr, args): x = args[0] - p = expr.p_rational - w = expr.w + p = expr.p if p == 1: return x, [] @@ -28,8 +27,7 @@ def pnorm_canon(expr, args): shape = expr.shape t = Variable(shape) if p % 2 == 0: - summation = [x[i]**p for i in range(len(x))] + summation = sum(x[i]**p for i in range(x.size)) return t, [t**p == summation, t >= 0] else: z = Variable(shape) - \ No newline at end of file diff --git a/cvxpy/reductions/expr2smooth/canonicalizers/trig_canon.py b/cvxpy/reductions/expr2smooth/canonicalizers/trig_canon.py new file mode 100644 index 0000000000..7bcce5b938 --- /dev/null +++ b/cvxpy/reductions/expr2smooth/canonicalizers/trig_canon.py @@ -0,0 +1,41 @@ +""" +Copyright 2025 CVXPY developers + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +from cvxpy.expressions.variable import Variable + + +def cos_canon(expr, args): + if type(args[0]) is not Variable: + t = Variable(args[0].size) + t.value = expr.point_in_domain() + return expr.copy([t]), [t==args[0]] + return expr, [] + + +def sin_canon(expr, args): + if type(args[0]) is not Variable: + t = Variable(args[0].size) + t.value = expr.point_in_domain() + return expr.copy([t]), [t==args[0]] + return expr, [] + + +def tan_canon(expr, args): + if type(args[0]) is not Variable: + t = Variable(args[0].size) + t.value = expr.point_in_domain() + return expr.copy([t]), [t==args[0]] + return expr, [] diff --git a/cvxpy/reductions/solvers/nlp_solvers/ipopt_nlpif.py b/cvxpy/reductions/solvers/nlp_solvers/ipopt_nlpif.py index 2aa8acedb4..c394ad1145 100644 --- a/cvxpy/reductions/solvers/nlp_solvers/ipopt_nlpif.py +++ b/cvxpy/reductions/solvers/nlp_solvers/ipopt_nlpif.py @@ -261,7 +261,7 @@ def jacobianstructure(self): #var.value = self.initial_point[offset] var.value = np.nan else: - var.value = np.nan * np.ones(var.size) + var.value = (np.nan * np.ones(var.size)).reshape(var.shape, order='F') #var.value = np.atleast_1d(self.initial_point[offset:offset + var.size]) #offset += var.size rows, cols = [], [] diff --git a/cvxpy/sandbox/logistic.py b/cvxpy/sandbox/logistic.py new file mode 100644 index 0000000000..112f174422 --- /dev/null +++ b/cvxpy/sandbox/logistic.py @@ -0,0 +1,50 @@ +import cvxpy as cp +import numpy as np + +# Generate synthetic data +np.random.seed(42) +n_samples = 1000 +n_features = 50 + +# Create two classes +X_class0 = np.random.randn(n_samples // 2, n_features) - 1 +X_class1 = np.random.randn(n_samples // 2, n_features) + 1 +X = np.vstack([X_class0, X_class1]) + +# Labels: -1 for class 0, +1 for class 1 +y = np.concatenate([-np.ones(n_samples // 2), np.ones(n_samples // 2)]) + +# Add intercept term (bias) +X_with_intercept = np.column_stack([np.ones(n_samples), X]) + +# Define CVXPY variables +n_features_with_intercept = n_features + 1 +w = cp.Variable(n_features_with_intercept) # weights including bias + +# Regularization parameter +lambda_reg = 0.1 + +# Logistic regression objective: minimize negative log-likelihood + L2 regularization +# Using log-sum-exp formulation for numerical stability +log_likelihood = cp.sum(cp.logistic(-cp.multiply(y, X_with_intercept @ w))) +regularization = lambda_reg * cp.norm(w[1:], 2)**2 # Don't regularize intercept + +# Define the optimization problem +objective = cp.Minimize(log_likelihood + regularization) +problem = cp.Problem(objective) + +# Solve the problem +problem.solve(solver=cp.IPOPT, nlp=True, verbose=True) + +# Print results +print(f"Optimization status: {problem.status}") +print(f"Optimal objective value: {problem.value:.4f}") +print("Optimal weights (including bias):") +print(f" Bias (intercept): {w.value[0]:.4f}") +for i in range(n_features): + print(f" Weight {i+1}: {w.value[i+1]:.4f}") + +# Make predictions on training data +predictions = np.sign(X_with_intercept @ w.value) +accuracy = np.mean(predictions == y) +print(f"\nTraining accuracy: {accuracy:.2%}") diff --git a/cvxpy/sandbox/mle-canon.py b/cvxpy/sandbox/mle-canon.py new file mode 100644 index 0000000000..d2c46de361 --- /dev/null +++ b/cvxpy/sandbox/mle-canon.py @@ -0,0 +1,39 @@ +import numpy as np + +import cvxpy as cp + +n = 2 +np.random.seed(1234) +data = np.random.randn(n) + +mu = cp.Variable(1, name="mu") +mu.value = np.array([0.0]) +sigma = cp.Variable(1, name="sigma") +sigma.value = np.array([1.0]) +t1 = cp.Variable(1, name="t1") +t2 = cp.Variable(n, name="t2") +t3 = cp.Variable(1, name="t3") +t4 = cp.Variable(1, name="t4") +t5 = cp.Variable(1, name="t5") + +log_likelihood = ((n / 2) * cp.log(t4) - cp.sum(cp.square(t2)) / (2 * (t1)**2)) + +constraints = [ + t1 == sigma, + t2 == data - mu, + t3 == (2 * np.pi * (t5)**2), + t4 == 1 / t3, + t5 == sigma, +] +t1.value = sigma.value +t2.value = data - mu.value +t3.value = (2 * np.pi * (sigma.value)**2) +t4.value = 1 / t3.value +t5.value = sigma.value + +objective = cp.Maximize(log_likelihood) +problem = cp.Problem(objective, constraints) +problem.solve(solver=cp.IPOPT, nlp=True) +assert problem.status == cp.OPTIMAL +assert np.allclose(mu.value, np.mean(data)) +assert np.allclose(sigma.value, np.std(data)) diff --git a/cvxpy/sandbox/rayleigh-quotient.py b/cvxpy/sandbox/rayleigh-quotient.py new file mode 100644 index 0000000000..62454c51df --- /dev/null +++ b/cvxpy/sandbox/rayleigh-quotient.py @@ -0,0 +1,192 @@ +import numpy as np +from scipy.linalg import eigh + +import cvxpy as cp + +# Generate real data for a structural vibration problem +# A represents stiffness matrix, B represents mass matrix +np.random.seed(42) +n = 10 # dimension + +# Create symmetric positive definite matrices +# Stiffness matrix (typically has larger eigenvalues) +K_random = np.random.randn(n, n) +K = K_random @ K_random.T + 5 * np.eye(n) # Stiffness matrix + +# Mass matrix (typically has eigenvalues around 1) +M_random = np.random.randn(n, n) * 0.3 +M = M_random @ M_random.T + np.eye(n) # Mass matrix + +print("Stiffness matrix (K) - first 5x5 block:") +print(K[:5, :5].round(2)) +print("\nMass matrix (M) - first 5x5 block:") +print(M[:5, :5].round(2)) + +# Direct non-convex Rayleigh quotient formulation +def rayleigh_quotient_direct(K, M, find_min=True): + """ + Direct formulation of Rayleigh quotient problem: + + minimize/maximize: (x^T K x) / (x^T M x) + subject to: ||x||_2 = 1 + + This is the pure, non-convex formulation. + """ + # Decision variable + x = cp.Variable(n) + + # Rayleigh quotient: (x^T K x) / (x^T M x) + numerator = cp.quad_form(x, K) + denominator = cp.quad_form(x, M) + rayleigh_quotient = numerator / denominator + + # Objective + if find_min: + objective = cp.Minimize(rayleigh_quotient) + else: + objective = cp.Maximize(rayleigh_quotient) + + # Constraint: unit norm to avoid trivial solution + constraints = [ + cp.norm(x, 2) == 1 + ] + + # Create problem + problem = cp.Problem(objective, constraints) + + print(f"\n{'='*50}") + print(f"Solving for {'minimum' if find_min else 'maximum'} Rayleigh quotient") + + # Solve with a non-convex solver + # SCS can sometimes handle non-convex problems + try: + # Set initial guess + x_init = np.random.randn(n) + x_init = x_init / np.linalg.norm(x_init) + x.value = x_init + + # Solve + problem.solve(solver=cp.IPOPT, nlp=True, verbose=True) + + if x.value is not None: + # Compute the Rayleigh quotient value + rq_value = (x.value.T @ K @ x.value) / (x.value.T @ M @ x.value) + return x.value, rq_value + else: + print("Solver failed to find a solution") + return None, None + + except Exception as e: + print(f"Error during solving: {e}") + return None, None +""" +# Alternative formulation: minimize quadratic form with quadratic constraint +def rayleigh_quotient_alternative(K, M, find_min=True): + # Decision variable + x = cp.Variable(n) + + # Objective: quadratic form with K + if find_min: + objective = cp.Minimize(cp.quad_form(x, K)) + else: + objective = cp.Maximize(cp.quad_form(x, K)) + + # Constraint: quadratic form with M equals 1 + constraints = [ + cp.quad_form(x, M) == 1 + ] + + # Create problem + problem = cp.Problem(objective, constraints) + + print(f"\n{'='*50}") + print(f"Alternative formulation - {'minimum' if find_min else 'maximum'}") + + try: + # Set initial guess + x_init = np.random.randn(n) + x_init = x_init / np.sqrt(x_init.T @ M @ x_init) + x.value = x_init + + # Solve + problem.solve(solver=cp.IPOPT, nlp=True, verbose=True) + + if x.value is not None: + # The objective value is the Rayleigh quotient since x^T M x = 1 + rq_value = x.value.T @ K @ x.value + return x.value, rq_value + else: + print("Solver failed to find a solution") + return None, None + + except Exception as e: + print(f"Error during solving: {e}") + return None, None +""" +# Ground truth using scipy +print("="*50) +print("Ground Truth (using scipy.linalg.eigh):") +eigenvalues, eigenvectors = eigh(K, M) +min_eigenvalue = eigenvalues[0] +min_eigenvector = eigenvectors[:, 0] +max_eigenvalue = eigenvalues[-1] +max_eigenvector = eigenvectors[:, -1] + +print(f"Minimum eigenvalue: {min_eigenvalue:.6f}") +print(f"Maximum eigenvalue: {max_eigenvalue:.6f}") +print(f"Eigenvalue range: [{min_eigenvalue:.6f}, {max_eigenvalue:.6f}]") + +# Solve using direct formulation +print("\n" + "="*50) +print("DIRECT NON-CONVEX FORMULATION") + +# Find minimum +x_min, rq_min = rayleigh_quotient_direct(K, M, find_min=True) +if x_min is not None: + print(f"\nMinimum Rayleigh quotient found: {rq_min:.6f}") + print(f"Ground truth minimum: {min_eigenvalue:.6f}") + print(f"Error: {abs(rq_min - min_eigenvalue):.6e}") + +# Find maximum +x_max, rq_max = rayleigh_quotient_direct(K, M, find_min=False) +if x_max is not None: + print(f"\nMaximum Rayleigh quotient found: {rq_max:.6f}") + print(f"Ground truth maximum: {max_eigenvalue:.6f}") + print(f"Error: {abs(rq_max - max_eigenvalue):.6e}") + +# Solve using alternative formulation +print("\n" + "="*50) +print("ALTERNATIVE NON-CONVEX FORMULATION") + +""" +# Find minimum +x_min_alt, rq_min_alt = rayleigh_quotient_alternative(K, M, find_min=True) +if x_min_alt is not None: + print(f"\nMinimum Rayleigh quotient found: {rq_min_alt:.6f}") + print(f"Ground truth minimum: {min_eigenvalue:.6f}") + print(f"Error: {abs(rq_min_alt - min_eigenvalue):.6e}") + +# Find maximum +x_max_alt, rq_max_alt = rayleigh_quotient_alternative(K, M, find_min=False) +if x_max_alt is not None: + print(f"\nMaximum Rayleigh quotient found: {rq_max_alt:.6f}") + print(f"Ground truth maximum: {max_eigenvalue:.6f}") + print(f"Error: {abs(rq_max_alt - max_eigenvalue):.6e}") + +# Verification +print("\n" + "="*50) +print("VERIFICATION OF SOLUTIONS") +if x_min is not None: + print("\nDirect formulation (minimum):") + print(f" ||x||_2 = {np.linalg.norm(x_min):.6f}") + print(f" x^T K x = {x_min.T @ K @ x_min:.6f}") + print(f" x^T M x = {x_min.T @ M @ x_min:.6f}") + print(f" Rayleigh quotient = {(x_min.T @ K @ x_min) / (x_min.T @ M @ x_min):.6f}") + +if x_min_alt is not None: + print("\nAlternative formulation (minimum):") + print(f" ||x||_2 = {np.linalg.norm(x_min_alt):.6f}") + print(f" x^T K x = {x_min_alt.T @ K @ x_min_alt:.6f}") + print(f" x^T M x = {x_min_alt.T @ M @ x_min_alt:.6f}") + print(f" Rayleigh quotient = {(x_min_alt.T @ K @ x_min_alt) / (x_min_alt.T @ M @ x_min_alt):.6f}") +""" \ No newline at end of file diff --git a/cvxpy/sandbox/rayleigh_quotient_alternative.py b/cvxpy/sandbox/rayleigh_quotient_alternative.py new file mode 100644 index 0000000000..5092c2f15d --- /dev/null +++ b/cvxpy/sandbox/rayleigh_quotient_alternative.py @@ -0,0 +1,63 @@ +import numpy as np +from scipy.linalg import eigh +import cvxpy as cp + +# Generate real data for a structural vibration problem +np.random.seed(42) +n = 10 # dimension +K_random = np.random.randn(n, n) +K = K_random @ K_random.T + 5 * np.eye(n) +M_random = np.random.randn(n, n) * 0.3 +M = M_random @ M_random.T + np.eye(n) + +print("Stiffness matrix (K) - first 5x5 block:") +print(K[:5, :5].round(2)) +print("\nMass matrix (M) - first 5x5 block:") +print(M[:5, :5].round(2)) + +# Alternative formulation: minimize quadratic form with quadratic constraint +def rayleigh_quotient_alternative(K, M, find_min=True): + x = cp.Variable(n) + objective = cp.Minimize(cp.quad_form(x, K)) if find_min else cp.Maximize(cp.quad_form(x, K)) + constraints = [cp.quad_form(x, M) == 1] + problem = cp.Problem(objective, constraints) + print(f"\n{'='*50}") + print(f"Alternative formulation - {'minimum' if find_min else 'maximum'}") + x_init = np.random.randn(n) + x_init = x_init / np.sqrt(x_init.T @ M @ x_init) + x.value = x_init + problem.solve(solver=cp.IPOPT, nlp=True, verbose=True) + if x.value is not None: + rq_value = x.value.T @ K @ x.value + return x.value, rq_value + else: + print("Solver failed to find a solution") + return None, None + +# Ground truth using scipy +print("="*50) +print("Ground Truth (using scipy.linalg.eigh):") +eigenvalues, eigenvectors = eigh(K, M) +min_eigenvalue = eigenvalues[0] +min_eigenvector = eigenvectors[:, 0] +max_eigenvalue = eigenvalues[-1] +max_eigenvector = eigenvectors[:, -1] +print(f"Minimum eigenvalue: {min_eigenvalue:.6f}") +print(f"Maximum eigenvalue: {max_eigenvalue:.6f}") +print(f"Eigenvalue range: [{min_eigenvalue:.6f}, {max_eigenvalue:.6f}]") + +# Solve using alternative formulation +print("\n" + "="*50) +print("ALTERNATIVE NON-CONVEX FORMULATION") + +x_min_alt, rq_min_alt = rayleigh_quotient_alternative(K, M, find_min=True) +if x_min_alt is not None: + print(f"\nMinimum Rayleigh quotient found: {rq_min_alt:.6f}") + print(f"Ground truth minimum: {min_eigenvalue:.6f}") + print(f"Error: {abs(rq_min_alt - min_eigenvalue):.6e}") + +x_max_alt, rq_max_alt = rayleigh_quotient_alternative(K, M, find_min=False) +if x_max_alt is not None: + print(f"\nMaximum Rayleigh quotient found: {rq_max_alt:.6f}") + print(f"Ground truth maximum: {max_eigenvalue:.6f}") + print(f"Error: {abs(rq_max_alt - max_eigenvalue):.6e}") diff --git a/cvxpy/sandbox/rayleigh_quotient_direct.py b/cvxpy/sandbox/rayleigh_quotient_direct.py new file mode 100644 index 0000000000..595c6aff88 --- /dev/null +++ b/cvxpy/sandbox/rayleigh_quotient_direct.py @@ -0,0 +1,66 @@ +import numpy as np +from scipy.linalg import eigh +import cvxpy as cp + +# Generate real data for a structural vibration problem +np.random.seed(42) +n = 10 # dimension +K_random = np.random.randn(n, n) +K = K_random @ K_random.T + 5 * np.eye(n) +M_random = np.random.randn(n, n) * 0.3 +M = M_random @ M_random.T + np.eye(n) + +print("Stiffness matrix (K) - first 5x5 block:") +print(K[:5, :5].round(2)) +print("\nMass matrix (M) - first 5x5 block:") +print(M[:5, :5].round(2)) + +# Direct non-convex Rayleigh quotient formulation +def rayleigh_quotient_direct(K, M, find_min=True): + x = cp.Variable(n) + numerator = cp.quad_form(x, K) + denominator = cp.quad_form(x, M) + rayleigh_quotient = numerator / denominator + objective = cp.Minimize(rayleigh_quotient) if find_min else cp.Maximize(rayleigh_quotient) + constraints = [cp.norm(x, 2) == 1] + problem = cp.Problem(objective, constraints) + print(f"\n{'='*50}") + print(f"Solving for {'minimum' if find_min else 'maximum'} Rayleigh quotient") + x_init = np.random.randn(n) + x_init = x_init / np.linalg.norm(x_init) + x.value = x_init + problem.solve(solver=cp.IPOPT, nlp=True, verbose=True) + if x.value is not None: + rq_value = (x.value.T @ K @ x.value) / (x.value.T @ M @ x.value) + return x.value, rq_value + else: + print("Solver failed to find a solution") + return None, None + +# Ground truth using scipy +print("="*50) +print("Ground Truth (using scipy.linalg.eigh):") +eigenvalues, eigenvectors = eigh(K, M) +min_eigenvalue = eigenvalues[0] +min_eigenvector = eigenvectors[:, 0] +max_eigenvalue = eigenvalues[-1] +max_eigenvector = eigenvectors[:, -1] +print(f"Minimum eigenvalue: {min_eigenvalue:.6f}") +print(f"Maximum eigenvalue: {max_eigenvalue:.6f}") +print(f"Eigenvalue range: [{min_eigenvalue:.6f}, {max_eigenvalue:.6f}]") + +# Solve using direct formulation +print("\n" + "="*50) +print("DIRECT NON-CONVEX FORMULATION") + +x_min, rq_min = rayleigh_quotient_direct(K, M, find_min=True) +if x_min is not None: + print(f"\nMinimum Rayleigh quotient found: {rq_min:.6f}") + print(f"Ground truth minimum: {min_eigenvalue:.6f}") + print(f"Error: {abs(rq_min - min_eigenvalue):.6e}") + +x_max, rq_max = rayleigh_quotient_direct(K, M, find_min=False) +if x_max is not None: + print(f"\nMaximum Rayleigh quotient found: {rq_max:.6f}") + print(f"Ground truth maximum: {max_eigenvalue:.6f}") + print(f"Error: {abs(rq_max - max_eigenvalue):.6e}") diff --git a/cvxpy/sandbox/train_logistic_regression.py b/cvxpy/sandbox/train_logistic_regression.py new file mode 100644 index 0000000000..7ac17744f3 --- /dev/null +++ b/cvxpy/sandbox/train_logistic_regression.py @@ -0,0 +1,210 @@ +import matplotlib.pyplot as plt +import numpy as np +from sklearn.datasets import make_classification +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler + +import cvxpy as cp + + +def sigmoid(x): + """Sigmoid activation function""" + return 1 / (1 + cp.exp(-x)) + +def relu(x): + """ReLU activation function""" + return cp.maximum(0, x) + +def train_two_layer_nn(X, y, hidden_dim=10, lambda_reg=0.01, activation='sigmoid'): + """ + Train a two-layer neural network using CVXPY with logistic regression loss. + Assumes CVXPY can handle non-convex problems via NLP solvers. + + Parameters: + ----------- + X : numpy array of shape (n_samples, n_features) + Training data + y : numpy array of shape (n_samples,) + Binary labels (0 or 1) + hidden_dim : int + Number of hidden units + lambda_reg : float + L2 regularization parameter + activation : str + Activation function ('relu' or 'sigmoid') + + Returns: + -------- + W1, b1, W2, b2 : Trained network parameters + problem : CVXPY problem object + """ + n_samples, n_features = X.shape + + # Define optimization variables for network parameters + # W1: list of n_features-dimensional vectors, one per hidden unit + W1_vecs = [cp.Variable(n_features) for _ in range(hidden_dim)] # First layer weights (list of vectors) + W1 = cp.hstack(W1_vecs) # Shape: (n_features, hidden_dim) + b1 = cp.Variable(hidden_dim) # First layer bias (1-d) + W2 = cp.Variable(hidden_dim) # Second layer weights (1-d) + b2 = cp.Variable(1) # Second layer bias (1-d) + + # Forward pass through the network + # First layer: linear transformation + # Z1: shape (n_samples, hidden_dim) + Z1 = X @ W1 + b1 # W1 is now (n_features, hidden_dim) + + # Apply activation function + if activation == 'relu': + H1 = relu(Z1) + elif activation == 'sigmoid': + H1 = sigmoid(Z1) + else: + H1 = Z1 # Linear activation + + # Second layer: linear transformation + Z2 = H1 @ W2 + b2 # H1 shape (n_samples, hidden_dim), W2 shape (hidden_dim,) + + # Output probabilities using sigmoid + y_pred = sigmoid(Z2) + + # Logistic regression loss (binary cross-entropy) + # Loss = -1/n * sum(y*log(y_pred) + (1-y)*log(1-y_pred)) + y_reshaped = y.reshape(-1, 1) + logistic_loss = -cp.sum( + cp.multiply(y_reshaped, cp.log(y_pred + 1e-8)) + + cp.multiply(1 - y_reshaped, cp.log(1 - y_pred + 1e-8)) + ) / n_samples + + # L2 regularization + l2_penalty = lambda_reg * ( + sum(cp.sum_squares(w) for w in W1_vecs) + + cp.sum_squares(W2) + + cp.sum_squares(b1) + + cp.sum_squares(b2) + ) + + # Total objective function + objective = cp.Minimize(logistic_loss + l2_penalty) + + # Define the optimization problem + problem = cp.Problem(objective) + + # Solve using a nonlinear programming solver + # Assuming CVXPY can interface with NLP solvers like IPOPT, SNOPT, etc. + problem.solve(solver='IPOPT', nlp=True, verbose=True) + + return W1.value, b1.value, W2.value, b2.value, problem + +def predict(X, W1, b1, W2, b2, activation='sigmoid'): + """ + Make predictions using the trained neural network. + + Parameters: + ----------- + X : numpy array + Input data + W1, b1, W2, b2 : numpy arrays + Network parameters + activation : str + Activation function + + Returns: + -------- + predictions : numpy array + Binary predictions + probabilities : numpy array + Predicted probabilities + """ + # First layer + # W1: list of vectors + Z1 = np.column_stack([X @ W1[j] + b1[j] for j in range(len(W1))]) + + # Activation + if activation == 'relu': + H1 = np.maximum(0, Z1) + elif activation == 'sigmoid': + H1 = 1 / (1 + np.exp(-Z1)) + else: + H1 = Z1 + + # Second layer + Z2 = H1 @ W2 + b2 # H1 shape (n_samples, hidden_dim), W2 shape (hidden_dim,) + + # Output probabilities + probabilities = 1 / (1 + np.exp(-Z2)) + + # Binary predictions + predictions = (probabilities > 0.5).astype(int) + + return predictions.flatten(), probabilities.flatten() + +# Example usage +if __name__ == "__main__": + # Generate synthetic binary classification data + X, y = make_classification( + n_samples=1000, + n_features=20, + n_informative=15, + n_redundant=5, + n_clusters_per_class=2, + random_state=42 + ) + + # Split into train and test sets + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=42 + ) + + # Standardize features + scaler = StandardScaler() + X_train = scaler.fit_transform(X_train) + X_test = scaler.transform(X_test) + + # Train the neural network + print("Training two-layer neural network with CVXPY...") + print("=" * 50) + + W1, b1, W2, b2, problem = train_two_layer_nn( + X_train, y_train, + hidden_dim=20, + lambda_reg=0.001, + activation='sigmoid' + ) + + print(f"\nOptimization status: {problem.status}") + print(f"Final loss value: {problem.value:.4f}") + + # Make predictions on test set + y_pred, y_prob = predict(X_test, W1, b1, W2, b2, activation='sigmoid') + + # Calculate accuracy + accuracy = np.mean(y_pred == y_test) + print(f"\nTest accuracy: {accuracy:.4f}") + + # Print network architecture summary + print("\nNetwork Architecture:") + print(f"Input layer: {X_train.shape[1]} features") + print(f"Hidden layer: {W1.shape[1]} units (ReLU activation)") + print("Output layer: 1 unit (Sigmoid activation)") + print(f"\nTotal parameters: {W1.size + b1.size + W2.size + b2.size}") + + # Additional training with different activation + print("\n" + "=" * 50) + print("Training with sigmoid activation...") + + W1_sig, b1_sig, W2_sig, b2_sig, problem_sig = train_two_layer_nn( + X_train, y_train, + hidden_dim=20, + lambda_reg=0.001, + activation='sigmoid' + ) + + y_pred_sig, y_prob_sig = predict(X_test, W1_sig, b1_sig, W2_sig, b2_sig, activation='sigmoid') + accuracy_sig = np.mean(y_pred_sig == y_test) + + print(f"Test accuracy (sigmoid): {accuracy_sig:.4f}") + + # Compare the two models + print("\nModel Comparison:") + print(f"ReLU activation accuracy: {accuracy:.4f}") + print(f"Sigmoid activation accuracy: {accuracy_sig:.4f}")