Python常用算法解析,从优雅简洁到高效实战
目录
一、Python算法哲学:优雅与效率的平衡
1.1 Python算法的核心优势
1.2 Python的内置算法工具
二、列表与字典算法
2.1 列表推导式的艺术
2.2 字典的高级用法
三、排序与搜索算法
3.1 内置排序的灵活运用
3.2 二分查找与二等分模块
四、图形算法与网络分析
4.1 使用networkx进行图分析
4.2 自定义图算法实现
五、数值计算与科学计算
5.1 使用NumPy进行数值计算
5.2 使用pandas进行数据分析
六、机器学习算法
6.1 使用 scikit-learn
6.2 自定义机器学习算法

如果您喜欢此文章,请收藏、点赞、评论,谢谢,祝您快乐每一天。
一、Python算法哲学:优雅与效率的平衡
1.1 Python算法的核心优势
Python算法设计融合了概率编程范式:
函数式编程:map、filter、reduce、列表推导式
面向对象:自定义数据结构、操作重载
元编程:装饰器、元类、外汇
动态特性:鸭子类型、运行时类型检查
1.2 Python的内置算法工具
# 1. 内置函数(BIFs)中的算法
numbers = [3, 1, 4, 1, 5, 9, 2, 6]
# 排序相关
sorted_nums = sorted(numbers) # 返回新列表
numbers.sort() # 原地排序
reversed_nums = list(reversed(numbers)) # 反转
# 极值查找
max_val = max(numbers)
min_val = min(numbers)
sum_val = sum(numbers)
# 2. 使用key参数进行复杂排序
words = ["apple", "banana", "cherry", "date"]
sorted_words = sorted(words, key=len) # 按长度排序
sorted_words = sorted(words, key=str.lower) # 不区分大小写
# 3. 使用lambda表达式
students = [("Alice", 85), ("Bob", 92), ("Charlie", 78)]
sorted_students = sorted(students, key=lambda x: x[1], reverse=True)

二、列表与字典算法
2.1 列表推导式的艺术
# 1. 基本列表推导式
squares = [x**2 for x in range(10)] # [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
# 2. 带条件的列表推导式
even_squares = [x**2 for x in range(10) if x % 2 == 0] # 偶数的平方
# 3. 嵌套列表推导式
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
flattened = [num for row in matrix for num in row] # 展平二维列表
# 4. 字典推导式
squares_dict = {x: x**2 for x in range(5)} # {0: 0, 1: 1, 2: 4, 3: 9, 4: 16}
# 5. 集合推导式
unique_chars = {char for char in "hello world" if char != ' '} # {'h', 'e', 'l', 'o', 'w', 'r', 'd'}
# 6. 生成器表达式(惰性求值)
large_squares = (x**2 for x in range(1000000)) # 不立即生成所有值
sum_of_squares = sum(x**2 for x in range(1000)) # 内存高效
2.2 字典的高级用法
# 1. defaultdict:自动初始化字典
from collections import defaultdict
# 统计单词出现次数
word_counts = defaultdict(int)
for word in text.split():
word_counts[word] += 1
# 按首字母分组
words_by_letter = defaultdict(list)
for word in words:
words_by_letter[word[0]].append(word)
# 2. Counter:计数专用字典
from collections import Counter
# 统计元素出现次数
colors = ['red', 'blue', 'red', 'green', 'blue', 'blue']
color_counts = Counter(colors) # Counter({'blue': 3, 'red': 2, 'green': 1})
# 最常见的n个元素
most_common = color_counts.most_common(2) # [('blue', 3), ('red', 2)]
# 3. OrderedDict:保持插入顺序(Python 3.7+普通dict已有序)
from collections import OrderedDict
# LRU缓存实现
class LRUCache:
def __init__(self, capacity: int):
self.cache = OrderedDict()
self.capacity = capacity
def get(self, key):
if key not in self.cache:
return -1
self.cache.move_to_end(key) # 移动到末尾表示最近使用
return self.cache[key]
def put(self, key, value):
if key in self.cache:
self.cache.move_to_end(key)
self.cache[key] = value
if len(self.cache) > self.capacity:
self.cache.popitem(last=False) # 移除最旧的项

三、排序与搜索算法
3.1 内置排序的灵活运用
# 1. 复杂对象的排序
class Student:
def __init__(self, name, grade, age):
self.name = name
self.grade = grade
self.age = age
def __repr__(self):
return f"Student({self.name}, {self.grade}, {self.age})"
students = [
Student('Alice', 'A', 20),
Student('Bob', 'B', 19),
Student('Charlie', 'A', 21),
Student('David', 'C', 20)
]
# 按多个字段排序
sorted_students = sorted(students, key=lambda s: (s.grade, s.age))
# 2. 使用attrgetter和itemgetter
from operator import attrgetter, itemgetter
# attrgetter用于对象属性
sorted_by_age = sorted(students, key=attrgetter('age'))
# itemgetter用于元组或字典
data = [{'name': 'Alice', 'score': 85},
{'name': 'Bob', 'score': 92},
{'name': 'Charlie', 'score': 78}]
sorted_data = sorted(data, key=itemgetter('score'), reverse=True)
# 3. 稳定排序的特性
# Python的sorted()是稳定排序,相等元素的相对顺序保持不变
items = [('red', 1), ('blue', 1), ('red', 2), ('blue', 2)]
sorted_items = sorted(items, key=lambda x: x[0])
# 结果:[('blue', 1), ('blue', 2), ('red', 1), ('red', 2)]
# 相同颜色的元素保持了原始顺序
3.2 二分查找与二等分模块
import bisect
# 1. 基本二分查找
sorted_list = [1, 3, 5, 7, 9, 11, 13]
# 查找元素位置(返回插入位置)
position = bisect.bisect_left(sorted_list, 7) # 3
position = bisect.bisect_right(sorted_list, 7) # 4(重复元素时有用)
# 2. 插入元素保持有序
bisect.insort_left(sorted_list, 6) # 在适当位置插入6
bisect.insort_right(sorted_list, 8) # 插入8
# 3. 自定义比较函数的二分查找
def binary_search_custom(arr, target, key_func):
"""使用自定义key函数的二分查找"""
lo, hi = 0, len(arr)
while lo < hi:
mid = (lo + hi) // 2
if key_func(arr[mid]) < target:
lo = mid + 1
else:
hi = mid
return lo if lo < len(arr) and key_func(arr[lo]) == target else -1
# 4. 查找范围
def find_range(arr, target):
"""在有序数组中查找目标值的起始和结束位置"""
left = bisect.bisect_left(arr, target)
right = bisect.bisect_right(arr, target)
return (left, right) if left < right else (-1, -1)
# 示例
nums = [5, 7, 7, 8, 8, 8, 10]
start, end = find_range(nums, 8) # (3, 6)

四、图形算法与网络分析
4.1 使用networkx进行图分析
import networkx as nx
import matplotlib.pyplot as plt
# 1. 创建图
G = nx.Graph() # 无向图
# G = nx.DiGraph() # 有向图
# 添加节点和边
G.add_nodes_from([1, 2, 3, 4, 5])
G.add_edges_from([(1, 2), (1, 3), (2, 3), (3, 4), (4, 5)])
# 2. 基本图算法
# 最短路径
shortest_path = nx.shortest_path(G, source=1, target=5) # [1, 3, 4, 5]
# 连通分量
connected_components = list(nx.connected_components(G))
# 度中心性
degree_centrality = nx.degree_centrality(G)
# 3. PageRank算法
pagerank_scores = nx.pagerank(G, alpha=0.85)
# 4. 社区检测
from networkx.algorithms import community
# Louvain方法
communities = community.greedy_modularity_communities(G)
# 5. 可视化
pos = nx.spring_layout(G) # 布局算法
nx.draw(G, pos, with_labels=True, node_color='lightblue',
node_size=500, font_size=10)
plt.show()
4.2 自定义图算法实现
from collections import deque, defaultdict
# 1. BFS(广度优先搜索)
def bfs(graph, start):
"""图的广度优先遍历"""
visited = set()
queue = deque([start])
result = []
while queue:
vertex = queue.popleft()
if vertex not in visited:
visited.add(vertex)
result.append(vertex)
queue.extend(graph[vertex] - visited)
return result
# 2. DFS(深度优先搜索)- 递归版本
def dfs_recursive(graph, vertex, visited=None):
if visited is None:
visited = set()
visited.add(vertex)
for neighbor in graph[vertex]:
if neighbor not in visited:
dfs_recursive(graph, neighbor, visited)
return visited
# 3. Dijkstra算法
import heapq
def dijkstra(graph, start):
"""单源最短路径 - Dijkstra算法"""
distances = {vertex: float('infinity') for vertex in graph}
distances[start] = 0
pq = [(0, start)] # 优先队列
while pq:
current_distance, current_vertex = heapq.heappop(pq)
if current_distance > distances[current_vertex]:
continue
for neighbor, weight in graph[current_vertex].items():
distance = current_distance + weight
if distance < distances[neighbor]:
distances[neighbor] = distance
heapq.heappush(pq, (distance, neighbor))
return distances
# 4. 拓扑排序
def topological_sort(graph):
"""有向无环图的拓扑排序"""
in_degree = {u: 0 for u in graph}
# 计算入度
for u in graph:
for v in graph[u]:
in_degree[v] += 1
# 收集入度为0的节点
queue = deque([u for u in graph if in_degree[u] == 0])
result = []
while queue:
u = queue.popleft()
result.append(u)
for v in graph[u]:
in_degree[v] -= 1
if in_degree[v] == 0:
queue.append(v)
if len(result) != len(graph):
raise ValueError("图中存在环")
return result

五、数值计算与科学计算
5.1 使用NumPy进行数值计算
import numpy as np
# 1. 数组创建和操作
arr = np.array([1, 2, 3, 4, 5])
matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
# 2. 向量化操作(避免Python循环)
# 传统Python方式
squares = [x**2 for x in range(1000000)] # 慢
# NumPy向量化方式
squares_np = np.arange(1000000) ** 2 # 快
# 3. 广播机制
a = np.array([[1, 2, 3], [4, 5, 6]])
b = np.array([10, 20, 30])
result = a + b # 广播:[[11, 22, 33], [14, 25, 36]]
# 4. 线性代数运算
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])
dot_product = np.dot(A, B) # 矩阵乘法
determinant = np.linalg.det(A) # 行列式
eigenvalues, eigenvectors = np.linalg.eig(A) # 特征值和特征向量
# 5. 统计计算
data = np.random.randn(1000, 10) # 1000行10列的随机数据
mean = np.mean(data, axis=0) # 每列的平均值
std = np.std(data, axis=0) # 每列的标准差
correlation = np.corrcoef(data.T) # 相关系数矩阵
5.2 使用pandas进行数据分析
import pandas as pd
import numpy as np
# 1. 创建DataFrame
data = {
'Name': ['Alice', 'Bob', 'Charlie', 'David'],
'Age': [25, 30, 35, 40],
'Salary': [50000, 60000, 70000, 80000],
'Department': ['HR', 'Engineering', 'Engineering', 'HR']
}
df = pd.DataFrame(data)
# 2. 数据筛选和查询
# 基本筛选
engineering_df = df[df['Department'] == 'Engineering']
high_salary = df[df['Salary'] > 60000]
# 复杂查询
result = df.query('Age > 30 and Salary < 75000')
# 3. 分组聚合
grouped = df.groupby('Department')
summary = grouped.agg({
'Age': ['mean', 'min', 'max'],
'Salary': ['sum', 'mean', 'std']
})
# 4. 数据透视表
pivot = pd.pivot_table(df,
values='Salary',
index='Department',
columns=None,
aggfunc=['mean', 'count', 'sum'])
# 5. 时间序列处理
dates = pd.date_range('2023-01-01', periods=100, freq='D')
time_series = pd.Series(np.random.randn(100), index=dates)
# 重采样
monthly_avg = time_series.resample('M').mean()
rolling_avg = time_series.rolling(window=7).mean() # 7天移动平均

六、机器学习算法
6.1 使用 scikit-learn
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
# 1. 加载数据
iris = load_iris()
X, y = iris.data, iris.target
# 2. 数据预处理
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# 3. 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(
X_scaled, y, test_size=0.2, random_state=42, stratify=y
)
# 4. 训练模型
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)
# 5. 模型评估
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))
# 6. 交叉验证
cv_scores = cross_val_score(clf, X_scaled, y, cv=5)
print(f"交叉验证平均得分: {cv_scores.mean():.3f}")
# 7. 特征重要性
feature_importances = clf.feature_importances_
for name, importance in zip(iris.feature_names, feature_importances):
print(f"{name}: {importance:.3f}")
6.2 自定义机器学习算法
import numpy as np
from collections import Counter
class KNN:
"""K近邻算法实现"""
def __init__(self, k=3):
self.k = k
def fit(self, X, y):
self.X_train = X
self.y_train = y
def predict(self, X):
predictions = [self._predict(x) for x in X]
return np.array(predictions)
def _predict(self, x):
# 计算距离
distances = [np.linalg.norm(x - x_train) for x_train in self.X_train]
# 获取k个最近邻的索引
k_indices = np.argsort(distances)[:self.k]
# 获取k个最近邻的标签
k_nearest_labels = [self.y_train[i] for i in k_indices]
# 多数投票
most_common = Counter(k_nearest_labels).most_common(1)
return most_common[0][0]
class LinearRegression:
"""线性回归实现(梯度下降)"""
def __init__(self, learning_rate=0.01, n_iterations=1000):
self.learning_rate = learning_rate
self.n_iterations = n_iterations
self.weights = None
self.bias = None
def fit(self, X, y):
n_samples, n_features = X.shape
# 初始化参数
self.weights = np.zeros(n_features)
self.bias = 0
# 梯度下降
for _ in range(self.n_iterations):
# 预测
y_pred = np.dot(X, self.weights) + self.bias
# 计算梯度
dw = (1/n_samples) * np.dot(X.T, (y_pred - y))
db = (1/n_samples) * np.sum(y_pred - y)
# 更新参数
self.weights -= self.learning_rate * dw
self.bias -= self.learning_rate * db
def predict(self, X):
return np.dot(X, self.weights) + self.bias
如果您喜欢此文章,请收藏、点赞、评论,谢谢,祝您快乐每一天。







