Linear Fitting with Matplotlib and Plotly

Linear regression is a fundamental method in data analysis to understand the relationship between two variables. Here, I summarize four reusable Python functions for performing and visualizing linear fitting:

  1. Matplotlib: with and without intercept
  2. Plotly: with and without intercept

All methods:

  • Drop NaN values in x and y
  • Plot a scatter graph
  • Fit a linear line
  • Annotate the equation and correlation coefficient (R)

1. Matplotlib — Linear Fit with Intercept

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import linregress

def add_mpl_subplot_with_intercept(ax, df, x_col, y_col, color='blue'):
x = df[x_col]
y = df[y_col]
valid = ~(x.isna() | y.isna())
x = x[valid]
y = y[valid]

ax.scatter(x, y, color=color, alpha=0.6)

# Linear regression with intercept
slope, intercept, r_value, _, _ = linregress(x, y)
line_x = np.linspace(x.min(), x.max(), 100)
line_y = slope * line_x + intercept
ax.plot(line_x, line_y, 'r--')

# Annotate equation and R
eq_text = f"y = {slope:.2f}x + {intercept:.2f}\nR = {r_value:.2f}"
ax.annotate(eq_text,
xy=(0.05, 0.95), xycoords='axes fraction',
ha='left', va='top', fontsize=10, color='black')

2. Matplotlib — Linear Fit without Intercept

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
def add_mpl_subplot_no_intercept(ax, df, x_col, y_col, color='green'):
x = df[x_col]
y = df[y_col]
valid = ~(x.isna() | y.isna())
x = x[valid]
y = y[valid]

ax.scatter(x, y, color=color, alpha=0.6)

# Linear regression without intercept
slope = np.sum(x * y) / np.sum(x ** 2)
r_value = x.corr(y)
line_x = np.linspace(x.min(), x.max(), 100)
line_y = slope * line_x
ax.plot(line_x, line_y, 'r--')

eq_text = f"y = {slope:.2f}x\nR = {r_value:.2f}"
ax.annotate(eq_text,
xy=(0.05, 0.95), xycoords='axes fraction',
ha='left', va='top', fontsize=10, color='black')

3. Plotly — Linear Fit with Intercept

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import plotly.graph_objects as go
from scipy.stats import linregress

def add_plotly_subplot_with_intercept(fig, df, x_col, y_col, row, col, color='blue', show_eq=True):
x = df[x_col]
y = df[y_col]
valid = ~(x.isna() | y.isna())
x = x[valid]
y = y[valid]

fig.add_trace(go.Scatter(
x=x, y=y, mode='markers',
marker=dict(color=color),
showlegend=False
), row=row, col=col)

slope, intercept, r_value, _, _ = linregress(x, y)
line_x = np.linspace(x.min(), x.max(), 100)
line_y = slope * line_x + intercept

fig.add_trace(go.Scatter(
x=line_x, y=line_y, mode='lines',
line=dict(color='red', dash='dash'),
showlegend=False
), row=row, col=col)

if show_eq:
eq_text = f"y = {slope:.2f}x + {intercept:.2f}<br>R = {r_value:.2f}"
fig.add_annotation(
text=eq_text,
xref=f'x{col}', yref=f'y{row}',
x=x.min() + 0.05 * (x.max() - x.min()),
y=y.max() - 0.05 * (y.max() - y.min()),
showarrow=False,
font=dict(size=11, color="black"),
align="left",
row=row, col=col
)

4. Plotly — Linear Fit without Intercept

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
def add_plotly_subplot_no_intercept(fig, df, x_col, y_col, row, col, color='green', show_eq=True):
x = df[x_col]
y = df[y_col]
valid = ~(x.isna() | y.isna())
x = x[valid]
y = y[valid]

fig.add_trace(go.Scatter(
x=x, y=y, mode='markers',
marker=dict(color=color),
showlegend=False
), row=row, col=col)

slope = np.sum(x * y) / np.sum(x ** 2)
r_value = x.corr(y)
line_x = np.linspace(x.min(), x.max(), 100)
line_y = slope * line_x

fig.add_trace(go.Scatter(
x=line_x, y=line_y, mode='lines',
line=dict(color='red', dash='dash'),
showlegend=False
), row=row, col=col)

if show_eq:
eq_text = f"y = {slope:.2f}x<br>R = {r_value:.2f}"
fig.add_annotation(
text=eq_text,
xref=f'x{col}', yref=f'y{row}',
x=x.min() + 0.05 * (x.max() - x.min()),
y=y.max() - 0.05 * (y.max() - y.min()),
showarrow=False,
font=dict(size=11, color="black"),
align="left",
row=row, col=col
)
🍵非靶向质谱方法探索茶叶的分子组成 Create your first web app: Interactive data panel for visualizatoin correlations

Comments

Your browser is out-of-date!

Update your browser to view this website correctly. Update my browser now

×