简介
语言是人类最重要的交际工具,是人类之间进行信息交换的主要表达方式。
编程语言是用来定义计算机程序的语言,用来向计算机发出指令
Python语言是一种面向对象的解释型计算机程序设计语言
数据类型详解
基本数据类型
# 整数 (int)
x = 10
y = -5
z = 0
large_num = 1_000_000 # 使用下划线提高可读性
print(f"类型: {type(x)}, 值: {x}") # <class 'int'>, 10
# 浮点数 (float)
a = 3.14
b = -2.5
c = 1.0
scientific = 1.23e-4 # 科学计数法 0.000123
print(f"类型: {type(a)}, 值: {a}") # <class 'float'>, 3.14
# 复数 (complex)
comp = 2 + 3j
comp2 = complex(4, -2) # 4 - 2j
print(f"类型: {type(comp)}, 实部: {comp.real}, 虚部: {comp.imag}")
# 布尔值 (bool)
is_active = True
is_finished = False
print(f"类型: {type(is_active)}, 值: {is_active}") # <class 'bool'>, True
# 布尔值实际上是整数的子类
print(True == 1) # True
print(False == 0) # True
print(True + 1) # 2
字符串 (String)
# 字符串创建 ' " """都可
s1 = "hello"
s2 = 'world'
s3 = """多行
字符串"""
s4 = r"原始字符串\n不转义" # raw string
s5 = f"格式化字符串 {s1}" # f-string
print(f"类型: {type(s1)}, 值: {s1}")
# 字符串操作
name = "Python"
print(name.upper()) # PYTHON
print(name.lower()) # python
print(name.startswith('P')) # True
print("pro" in "Python") # False
print(len(name)) # 6
# 字符串格式化
age = 25
message = f"我叫{name},今年{age}岁"
print(message) # 我叫Python,今年25岁
空值类型
# None 类型
result = None
nothing = None
print(f"类型: {type(result)}, 值: {result}") # <class 'NoneType'>, None
# 使用场景
def find_user(user_id):
if user_id > 100:
return {"name": "Alice", "age": 25}
else:
return None # 表示未找到
user = find_user(50)
if user is None:
print("用户不存在")
容器数据类型
列表 (List)
# 列表创建 用[]定义
fruits = ["apple", "banana", "cherry"]
numbers = [1, 2, 3, 4, 5]
mixed = [1, "hello", 3.14, True]
nested = [[1, 2], [3, 4]]
print(f"类型: {type(fruits)}, 值: {fruits}")
# 列表操作
fruits.append("orange") # 添加元素
fruits.insert(1, "grape") # 插入元素
removed = fruits.pop() # 删除并返回最后一个元素
fruits.remove("banana") # 删除指定元素
print(f"修改后: {fruits}") #out:['apple', 'grape', 'cherry']
print(f"切片: {fruits[1:3]}") # 切片操作 =[1.3) 从0索引 out:切片: ['grape', 'cherry']
print(f"长度: {len(fruits)}") # 3
# 列表推导式
squares = [x**2 for x in range(5)]
print(squares) # [0, 1, 4, 9, 16]
元组 (Tuple)
# 元组创建 - 不可变序列 用()定义
coordinates = (10, 20)
colors = ("red", "green", "blue")
single_element = (42,) # 注意逗号,没有逗号就是整数
mixed_tuple = (1, "hello", [1, 2, 3])
print(f"类型: {type(coordinates)}, 值: {coordinates}")
# 元组操作
print(coordinates[0]) # 10
print(coordinates[1]) # 20
print(len(coordinates)) # 2
# 元组解包
x, y = coordinates
print(f"x: {x}, y: {y}") # x: 10, y: 20
# 元组比列表更节省内存,访问更快
字典 (Dictionary)
# 字典创建 - 键值对集合 {}定义
person = {
"name": "Alice",
"age": 30,
"city": "New York",
"hobbies": ["reading", "swimming"]
}
empty_dict = {}
dict_from_list = dict([('a', 1), ('b', 2)])
print(f"类型: {type(person)}, 值: {person}")
# 字典操作
print(person["name"]) # Alice
print(person.get("age")) # 30
print(person.get("country", "未知")) # 未知
person["email"] = "alice@example.com" # 添加新键值对
person["age"] = 31 # 修改值
# 遍历字典
for key, value in person.items():
print(f"{key}: {value}")
# 字典推导式
squares_dict = {x: x**2 for x in range(5)}
print(squares_dict) # {0: 0, 1: 1, 2: 4, 3: 9, 4: 16}
集合 (Set)
# 集合创建 - 无序不重复元素
fruits_set = {"apple", "banana", "cherry"}
numbers_set = {1, 2, 3, 4, 5}
empty_set = set() # 注意:{} 创建的是空字典
print(f"类型: {type(fruits_set)}, 值: {fruits_set}")
# 集合操作
fruits_set.add("orange") # 添加元素
fruits_set.remove("banana") # 删除元素
fruits_set.discard("grape") # 安全删除(不存在不报错)
set1 = {1, 2, 3}
set2 = {3, 4, 5}
print(set1 | set2) # 并集: {1, 2, 3, 4, 5}
print(set1 & set2) # 交集: {3}
print(set1 - set2) # 差集: {1, 2}
# 集合推导式
even_squares = {x**2 for x in range(10) if x % 2 == 0}
print(even_squares) # {0, 4, 16, 36, 64}
类型转换
# 显式类型转换
num_str = "123"
num_int = int(num_str) # 字符串转整数
num_float = float(num_str) # 字符串转浮点数
str_num = str(123) # 数字转字符串
list_data = [1, 2, 3]
tuple_data = tuple(list_data) # 列表转元组
set_data = set(list_data) # 列表转集合
# 布尔转换
print(bool(0)) # False
print(bool(1)) # True
print(bool("")) # False
print(bool("hello")) # True
print(bool([])) # False
print(bool([1, 2])) # True
标准输出
| 符号 | 描述 | 示例 | 输出 |
|---|---|---|---|
%s | 字符串 | "%s" % "hello" | hello |
%d | 十进制整数 | "%d" % 42 | 42 |
%f | 浮点数 | "%.2f" % 3.14159 | 3.14 |
%e | 科学计数法 | "%.2e" % 1234 | 1.23e+03 |
%x | 十六进制 | "%x" % 255 | ff |
%o | 八进制 | "%o" % 64 | 100 |
%c | 字符 | "%c" % 65 | A |
%% | 百分号 | "%.1f%%" % 25.6 | 25.6% |
最佳实践
# 1. 优先使用 f-string (Python 3.6+)
name = "Alice"
age = 25
print(f"{name} is {age} years old") # 推荐 f"{VarName}"
# 2. 复杂格式化使用 format()
data = {"x": 10, "y": 20}
print("坐标: ({x}, {y})".format(**data)) # 推荐 **是解包 "{},{}".format(10,20)
# 3. 避免过度复杂的格式化
# 不好的做法
print(f"{(lambda x: x**2)(5)}") # 太复杂
# 好的做法
result = (lambda x: x**2)(5)
print(f"{result}")
# 4. 保持可读性
# 难以阅读
print(f"{name:>15}{age:>5}{score:>8.2f}")
# 易于阅读
print(f"{name:>15} {age:>5} {score:>8.2f}")
标准输入
基本输入
# 最简单的输入
name = input("请输入你的姓名: ")
print(f"你好, {name}!")
# 运行示例:
# 请输入你的姓名: 张三
# 你好, 张三!
不同类型输入的处理
# 输入字符串(默认)
text = input("请输入文本: ")
print(f"你输入的文本是: {text}")
# 输入数字 - 需要类型转换
age = int(input("请输入你的年龄: "))
print(f"明年你就 {age + 1} 岁了")
# 输入浮点数
height = float(input("请输入你的身高(米): "))
print(f"你的身高是: {height} 米")
# 输入布尔值
is_student = input("你是学生吗? (y/n): ").lower() == 'y'
print(f"学生状态: {is_student}")
If逻辑判断
基础if语句
# 基本语法
# if
age = 18
if age >= 18:
print("你已经成年了")
print("可以参加投票")
# 运行结果: 你已经成年了
#if-else
# 二选一的情况
score = 85
if score >= 60:
print("恭喜,考试及格!")
else:
print("很遗憾,需要补考")
# 运行结果: 恭喜,考试及格!
# 多条件判断
# if-elif-else
score = 85
if score >= 90:
grade = "A"
print("优秀!")
elif score >= 80:
grade = "B"
print("良好!")
elif score >= 70:
grade = "C"
print("中等!")
elif score >= 60:
grade = "D"
print("及格!")
else:
grade = "F"
print("不及格!")
print(f"你的等级是: {grade}")
# 运行结果: 良好! 你的等级是: B
比较运算符
a = 10
b = 5
print(f"a = {a}, b = {b}")
print(f"a == b: {a == b}") # 等于
print(f"a != b: {a != b}") # 不等于
print(f"a > b: {a > b}") # 大于
print(f"a < b: {a < b}") # 小于
print(f"a >= b: {a >= b}") # 大于等于
print(f"a <= b: {a <= b}") # 小于等于
# 字符串比较
name1 = "Alice"
name2 = "Bob"
print(f"name1 == name2: {name1 == name2}")
print(f"name1 != name2: {name1 != name2}")
out:a = 10, b = 5
a == b: False
a != b: True
a > b: True
a < b: False
a >= b: True
a <= b: False
name1 == name2: False
name1 != name2: True
循环控制
while
基础语句
# 基本语法
count = 1
while count <= 5:
print(f"计数: {count}")
count += 1
print("循环结束")
# 输出:
# 计数: 1
# 计数: 2
# 计数: 3
# 计数: 4
# 计数: 5
# 循环结束
无限循环
# 用户交互的 while 循环
total = 0
count = 0
print("请输入数字,输入 'quit' 结束:")
while True:
user_input = input("请输入数字: ")
if user_input.lower() == 'quit':
break
try:
number = float(user_input)
total += number
count += 1
except ValueError:
print("请输入有效的数字!")
if count > 0:
print(f"总和: {total}, 平均值: {total/count:.2f}")
else:
print("没有输入有效数字")
while-else 结构
# while-else: 循环正常结束时执行 else
def find_number(target, max_attempts=5):
"""在1-10中查找数字"""
attempts = 0
found = False
while attempts < max_attempts:
attempts += 1
guess = input(f"第{attempts}次尝试,猜一个数字(1-10): ")
try:
if int(guess) == target:
print("恭喜,猜对了!")
found = True
break
else:
print("猜错了,再试一次")
except ValueError:
print("请输入有效数字")
else:
# 当循环正常结束(没有break)时执行
print(f"很遗憾,{max_attempts}次机会已用完")
return found
# 使用示例
# find_number(7)
For
基础for语句
# 遍历列表
fruits = ['apple', 'banana', 'orange', 'grape']
print("水果列表:")
for fruit in fruits:
print(f" - {fruit}")
# 遍历字符串
text = "Python"
print("\n字符串字符:")
for char in text:
print(f" - {char}")
# 遍历字典
person = {'name': 'Alice', 'age': 25, 'city': 'Beijing'}
print("\n字典键值对:")
for key, value in person.items():
print(f" - {key}: {value}")
range()函数
# range() 函数生成数字序列
print("range(5):", list(range(5))) # [0, 1, 2, 3, 4]
print("range(2, 6):", list(range(2, 6))) # [2, 3, 4, 5]
print("range(1, 10, 2):", list(range(1, 10, 2))) # [1, 3, 5, 7, 9]
# 使用 range 的 for 循环
print("\n使用 range 的循环:")
for i in range(5):
print(f"第{i}次循环")
# 带索引的遍历
fruits = ['apple', 'banana', 'orange']
print("\n带索引的遍历:")
for index, fruit in enumerate(fruits):
print(f"{index}: {fruit}")
for-else结构
# for-else: 循环正常结束时执行 else
def check_prime(number):
"""检查质数"""
if number < 2:
return False
for i in range(2, int(number**0.5) + 1):
if number % i == 0:
print(f"{number} 不是质数,能被 {i} 整除")
break
else:
# 如果循环正常结束(没有找到因数)
print(f"{number} 是质数")
return True
return False
# 测试质数检查
check_prime(17) # 17 是质数
check_prime(15) # 15 不是质数,能被 3 整除
break
# 在列表中查找元素
def find_item(items, target):
"""在列表中查找元素"""
for index, item in enumerate(items):
if item == target:
print(f"找到 '{target}' 在位置 {index}")
break
else:
print(f"没有找到 '{target}'")
# 使用示例
fruits = ['apple', 'banana', 'orange', 'grape']
find_item(fruits, 'orange') # 找到 'orange' 在位置 2
find_item(fruits, 'pear') # 没有找到 'pear'
continue
# 跳过某些迭代
def process_numbers(numbers):
"""处理数字,跳过负数和零"""
total = 0
count = 0
for num in numbers:
if num <= 0:
continue # 跳过负数和零
# 只处理正数
total += num
count += 1
print(f"处理正数: {num}")
if count > 0:
print(f"正数平均值: {total/count:.2f}")
else:
print("没有正数")
# 使用示例
numbers = [1, -2, 3, 0, 5, -1, 4]
process_numbers(numbers)
字符串操作
索引和切片
text = "Python Programming"
# 索引
print(f"第一个字符: {text[0]}") # P
print(f"最后一个字符: {text[-1]}") # g
print(f"倒数第二个字符: {text[-2]}") # n
# 切片 [start:end:step] 不填就默认
print(f"前6个字符: {text[0:6]}") # Python
print(f"从第7开始: {text[7:]}") # Programming
print(f"每隔一个字符: {text[::2]}") # Pto rgamn
print(f"反转字符串: {text[::-1]}") # gnimmargorP nohtyP
查找和验证
查找
text = "Python programming is fun and Python is powerful"
# 查找子字符串
print(f"find 'Python': {text.find('Python')}") # 0
print(f"find 'Java': {text.find('Java')}") # -1 (未找到)
print(f"rfind 'Python': {text.rfind('Python')}") # 23 (从右边查找)
# 索引方法 (找不到会报错)
print(f"index 'fun': {text.index('fun')}") # 20
# print(f"index 'Java': {text.index('Java')}") # ValueError
# 计数出现次数
print(f"count 'Python': {text.count('Python')}") # 2
print(f"count 'is': {text.count('is')}") # 2
验证
# 各种验证方法
test_strings = [
"Hello123", # 字母数字
"HELLO", # 全大写
"hello", # 全小写
"12345", # 全数字
"Hello World", # 含空格
"Hello123!", # 含特殊字符
" ", # 空格
"", # 空字符串
]
for s in test_strings:
print(f"'{s}':")
print(f" isalpha(): {s.isalpha()}") # 是否全字母
print(f" isdigit(): {s.isdigit()}") # 是否全数字
print(f" isalnum(): {s.isalnum()}") # 是否字母数字
print(f" isupper(): {s.isupper()}") # 是否全大写
print(f" islower(): {s.islower()}") # 是否全小写
print(f" isspace(): {s.isspace()}") # 是否全空格
print(f" startswith('H'): {s.startswith('H')}") # 是否以H开头
print(f" endswith('!'): {s.endswith('!')}") # 是否以!结尾
print()
字符串修改方法
大小写转换
text = "Hello Python WORLD"
print(f"原始: {text}")
print(f"大写: {text.upper()}") # HELLO PYTHON WORLD
print(f"小写: {text.lower()}") # hello python world
print(f"首字母大写: {text.capitalize()}") # Hello python world
print(f"每个单词首字母大写: {text.title()}") # Hello Python World
print(f"大小写交换: {text.swapcase()}") # hELLO pYTHON world
字符串修整和填充
# 去除空白字符
messy_text = " Hello World! \n\t"
print(f"原始: '{messy_text}'")
print(f"strip(): '{messy_text.strip()}'") # 去除两端空白
print(f"lstrip(): '{messy_text.lstrip()}'") # 去除左端空白
print(f"rstrip(): '{messy_text.rstrip()}'") # 去除右端空白
# 字符串填充
text = "Python"
print(f"原始: '{text}'")
print(f"居中: '{text.center(20)}'") # 居中填充
print(f"左对齐: '{text.ljust(20)}'") # 左对齐填充
print(f"右对齐: '{text.rjust(20)}'") # 右对齐填充
print(f"零填充: '{text.zfill(10)}'") # 零填充
print(f"自定义填充: '{text.center(20, '*')}'") # 使用*填充
替换和翻译
# 替换
text = "I like cats. Cats are cute. I have two cats."
print(f"原始: {text}")
print(f"替换 'cats' 为 'dogs': {text.replace('cats', 'dogs')}")
print(f"只替换一次: {text.replace('cats', 'dogs', 1)}")
# 翻译 (字符映射)
translation_table = str.maketrans('aeiou', '12345')
text = "hello world"
print(f"原始: {text}")
print(f"翻译后: {text.translate(translation_table)}")
# 删除特定字符
remove_digits = str.maketrans('', '', '0123456789')
text = "Hello123 World456"
print(f"删除数字: {text.translate(remove_digits)}")
字符串分割和连接
分割方法
# 各种分割方式
text = "apple,banana,orange,grape,melon"
csv_data = "John,25,Developer,New York"
sentence = "This is a sample sentence for demonstration"
print("基本分割:")
print(f"split(','): {text.split(',')}")
print(f"split(',', 2): {text.split(',', 2)}") # 最多分割2次
print("\nrsplit (从右边分割):")
print(f"rsplit(',', 2): {text.rsplit(',', 2)}")
print("\n按行分割:")
multiline_text = "Line 1\nLine 2\nLine 3\nLine 4"
print(f"splitlines(): {multiline_text.splitlines()}")
print("\n按空格分割:")
print(f"split(): {sentence.split()}")
print(f"split(' '): {sentence.split(' ')}")
print("\n分区分割 (partition):")
url = "https://www.example.com/path/to/page"
print(f"partition('://'): {url.partition('://')}")
print(f"rpartition('/'): {url.rpartition('/')}")
连接方法
# join 方法
fruits = ['apple', 'banana', 'orange', 'grape']
print(f"列表: {fruits}")
print(f"用逗号连接: {', '.join(fruits)}")
print(f"用横线连接: {'-'.join(fruits)}")
print(f"用换行符连接:\n{'\n'.join(fruits)}")
# 连接数字列表
numbers = [1, 2, 3, 4, 5]
print(f"数字列表: {numbers}")
print(f"连接数字: {', '.join(map(str, numbers))}")
# 连接字典的键或值
person = {'name': 'Alice', 'age': '25', 'city': 'Beijing'}
print(f"连接键: {', '.join(person.keys())}")
print(f"连接值: {', '.join(person.values())}")
编码操作
# 字符串编码
text = "Hello Python! 你好 Python!"
# 不同编码
encodings = ['utf-8', 'gbk', 'ascii', 'latin-1']
for encoding in encodings:
try:
encoded = text.encode(encoding)
print(f"{encoding}: {encoded} (长度: {len(encoded)})")
except UnicodeEncodeError as e:
print(f"{encoding}: 编码错误 - {e}")
# 解码
bytes_data = b'Hello World'
print(f"解码: {bytes_data.decode('utf-8')}")
# 处理编码错误
problem_text = "café"
try:
encoded = problem_text.encode('ascii')
except UnicodeEncodeError:
encoded = problem_text.encode('ascii', errors='replace')
print(f"替换错误字符: {encoded}")
encoded = problem_text.encode('ascii', errors='ignore')
print(f"忽略错误字符: {encoded}")
列表操作详解
列表创建和基本操作
列表创建
# 空列表
empty_list = []
empty_list2 = list()
# 包含元素的列表
numbers = [1, 2, 3, 4, 5]
fruits = ['apple', 'banana', 'orange']
mixed = [1, 'hello', 3.14, True]
nested = [[1, 2], [3, 4], [5, 6]]
# 使用 list() 构造函数
from_string = list("hello") # ['h', 'e', 'l', 'l', 'o']
from_range = list(range(5)) # [0, 1, 2, 3, 4]
print("创建的列表:")
print(f"numbers: {numbers}")
print(f"fruits: {fruits}")
print(f"mixed: {mixed}")
print(f"from_string: {from_string}")
列表索引和切片
fruits = ['apple', 'banana', 'orange', 'grape', 'mango']
# 正索引
print(f"fruits[0]: {fruits[0]}") # apple
print(f"fruits[2]: {fruits[2]}") # orange
# 负索引
print(f"fruits[-1]: {fruits[-1]}") # mango (最后一个)
print(f"fruits[-2]: {fruits[-2]}") # grape (倒数第二个)
# 切片
print(f"fruits[1:4]: {fruits[1:4]}") # ['banana', 'orange', 'grape']
print(f"fruits[:3]: {fruits[:3]}") # ['apple', 'banana', 'orange']
print(f"fruits[2:]: {fruits[2:]}") # ['orange', 'grape', 'mango']
print(f"fruits[::2]: {fruits[::2]}") # ['apple', 'orange', 'mango'] (每隔一个)
print(f"fruits[::-1]: {fruits[::-1]}") # 反转列表
列表修改方法
添加元素
fruits = ['apple', 'banana']
# append() - 在末尾添加单个元素
fruits.append('orange')
print(f"append后: {fruits}") # ['apple', 'banana', 'orange']
# extend() - 添加多个元素(合并列表)
fruits.extend(['grape', 'mango'])
print(f"extend后: {fruits}") # ['apple', 'banana', 'orange', 'grape', 'mango']
# insert() - 在指定位置插入
fruits.insert(1, 'pear')
print(f"insert后: {fruits}") # ['apple', 'pear', 'banana', 'orange', 'grape', 'mango']
# + 运算符 - 连接列表
new_fruits = fruits + ['pineapple', 'watermelon']
print(f"+运算后: {new_fruits}")
删除元素
python
fruits = ['apple', 'banana', 'orange', 'grape', 'mango', 'banana']
# remove() - 删除第一个匹配的元素
fruits.remove('banana')
print(f"remove后: {fruits}") # ['apple', 'orange', 'grape', 'mango', 'banana']
# pop() - 删除并返回指定位置的元素
removed = fruits.pop(1) # 删除索引1的元素
print(f"pop后: {fruits}, 删除的元素: {removed}")
# pop() 不指定索引 - 删除最后一个元素
last = fruits.pop()
print(f"pop()后: {fruits}, 删除的元素: {last}")
# del 语句 - 按索引删除
del fruits[0]
print(f"del后: {fruits}") # ['grape', 'banana']
# clear() - 清空列表
fruits.clear()
print(f"clear后: {fruits}") # []
修改元素
python
numbers = [1, 2, 3, 4, 5]
# 直接赋值修改
numbers[0] = 10
numbers[-1] = 50
print(f"修改后: {numbers}") # [10, 2, 3, 4, 50]
# 切片赋值(批量修改)
numbers[1:4] = [20, 30, 40]
print(f"切片修改后: {numbers}") # [10, 20, 30, 40, 50]
# 替换子列表
numbers[1:3] = [200, 300, 400] # 可以改变长度
print(f"替换后: {numbers}") # [10, 200, 300, 400, 40, 50]
列表查询和统计
查找元素
python
fruits = ['apple', 'banana', 'orange', 'grape', 'mango', 'banana']
# index() - 查找元素第一次出现的索引
print(f"banana的索引: {fruits.index('banana')}") # 1
print(f"grape的索引: {fruits.index('grape')}") # 3
# 指定范围查找
print(f"从索引2开始找banana: {fruits.index('banana', 2)}") # 5
# count() - 统计元素出现次数
print(f"banana出现次数: {fruits.count('banana')}") # 2
print(f"apple出现次数: {fruits.count('apple')}") # 1
# in 运算符 - 检查元素是否存在
print(f"'orange'在列表中: {'orange' in fruits}") # True
print(f"'pear'在列表中: {'pear' in fruits}") # False
列表信息
python
numbers = [5, 2, 8, 1, 9, 3]
# 长度
print(f"列表长度: {len(numbers)}")
# 最大值和最小值
print(f"最大值: {max(numbers)}")
print(f"最小值: {min(numbers)}")
# 求和
print(f"总和: {sum(numbers)}")
# 任何和所有元素为True
bool_list = [True, False, True]
print(f"any: {any(bool_list)}") # 至少一个为True
print(f"all: {all(bool_list)}") # 所有都为True
列表排序和反转
排序操作
numbers = [5, 2, 8, 1, 9, 3]
fruits = ['banana', 'apple', 'orange', 'grape']
# sort() - 原地排序(修改原列表)
numbers.sort()
print(f"sort()后: {numbers}") # [1, 2, 3, 5, 8, 9]
# 降序排序
numbers.sort(reverse=True)
print(f"降序排序: {numbers}") # [9, 8, 5, 3, 2, 1]
# 字符串排序
fruits.sort()
print(f"水果排序: {fruits}") # ['apple', 'banana', 'grape', 'orange']
# sorted() - 返回新列表(不修改原列表)
original = [5, 2, 8, 1, 9]
sorted_list = sorted(original)
print(f"原列表: {original}") # [5, 2, 8, 1, 9]
print(f"排序后: {sorted_list}") # [1, 2, 5, 8, 9]
反转操作
python
numbers = [1, 2, 3, 4, 5]
# reverse() - 原地反转
numbers.reverse()
print(f"reverse()后: {numbers}") # [5, 4, 3, 2, 1]
# reversed() - 返回反转迭代器
original = [1, 2, 3, 4, 5]
reversed_list = list(reversed(original))
print(f"原列表: {original}") # [1, 2, 3, 4, 5]
print(f"反转后: {reversed_list}") # [5, 4, 3, 2, 1]
# 使用切片反转
print(f"切片反转: {original[::-1]}") # [5, 4, 3, 2, 1]
列表复制
浅拷贝 vs 深拷贝
python
# 直接赋值(引用同一个对象)
original = [1, 2, 3]
reference = original
reference[0] = 100
print(f"original: {original}") # [100, 2, 3] 原列表也被修改了!
# 浅拷贝
import copy
original = [1, 2, [3, 4]]
shallow_copy = original.copy() # 或 list(original) 或 original[:]
shallow_copy[0] = 100 # 修改第一层,不影响原列表
shallow_copy[2][0] = 300 # 修改嵌套列表,原列表也会被影响!
print(f"原列表: {original}") # [1, 2, [300, 4]]
print(f"浅拷贝: {shallow_copy}") # [100, 2, [300, 4]]
# 深拷贝
original = [1, 2, [3, 4]]
deep_copy = copy.deepcopy(original)
deep_copy[0] = 100
deep_copy[2][0] = 300
print(f"原列表: {original}") # [1, 2, [3, 4]] 不受影响
print(f"深拷贝: {deep_copy}") # [100, 2, [300, 4]]
元组操作详解
元组创建和基本特性
元组创建
# 空元组
empty_tuple = ()
empty_tuple2 = tuple()
# 单元素元组(必须有逗号)
single_item = (42,) # 注意逗号!
not_a_tuple = (42) # 这只是整数42
# 多元素元组
numbers = (1, 2, 3, 4, 5)
fruits = ('apple', 'banana', 'orange')
mixed = (1, 'hello', 3.14, True)
# 使用 tuple() 构造函数
from_list = tuple([1, 2, 3]) # (1, 2, 3)
from_string = tuple("hello") # ('h', 'e', 'l', 'l', 'o')
from_range = tuple(range(5)) # (0, 1, 2, 3, 4)
print("创建的元组:")
print(f"empty_tuple: {empty_tuple}")
print(f"single_item: {single_item} (类型: {type(single_item)})")
print(f"not_a_tuple: {not_a_tuple} (类型: {type(not_a_tuple)})")
print(f"numbers: {numbers}")
print(f"from_string: {from_string}")
元组的不可变性
# 元组创建后不能修改
fruits = ('apple', 'banana', 'orange')
# 这些操作都会报错!
# fruits[0] = 'pear' # TypeError
# fruits.append('grape') # AttributeError
# fruits.remove('banana') # AttributeError
print("元组不可变性演示:")
print(f"原始元组: {fruits}")
# 但可以重新赋值
fruits = ('pear', 'grape', 'mango')
print(f"重新赋值后: {fruits}")
# 如果元组包含可变对象,这些对象本身可以修改
nested_tuple = (1, 2, [3, 4])
print(f"嵌套元组: {nested_tuple}")
nested_tuple[2].append(5) # 可以修改列表!
print(f"修改后: {nested_tuple}") # (1, 2, [3, 4, 5])
元组索引和切片
索引访问
fruits = ('apple', 'banana', 'orange', 'grape', 'mango')
print("=== 元组索引 ===")
print(f"fruits: {fruits}")
# 正索引
print(f"fruits[0]: {fruits[0]}") # apple
print(f"fruits[2]: {fruits[2]}") # orange
# 负索引
print(f"fruits[-1]: {fruits[-1]}") # mango (最后一个)
print(f"fruits[-2]: {fruits[-2]}") # grape (倒数第二个)
# 尝试修改会报错
try:
fruits[0] = 'pear'
except TypeError as e:
print(f"错误: {e}")
切片操作
numbers = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)
print("=== 元组切片 ===")
print(f"numbers: {numbers}")
# 基本切片
print(f"numbers[2:6]: {numbers[2:6]}") # (2, 3, 4, 5)
print(f"numbers[:4]: {numbers[:4]}") # (0, 1, 2, 3)
print(f"numbers[5:]: {numbers[5:]}") # (5, 6, 7, 8, 9)
# 步长切片
print(f"numbers[::2]: {numbers[::2]}") # (0, 2, 4, 6, 8)
print(f"numbers[1::2]: {numbers[1::2]}") # (1, 3, 5, 7, 9)
# 负步长(反转)
print(f"numbers[::-1]: {numbers[::-1]}") # (9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
# 切片返回新元组
slice_result = numbers[1:4]
print(f"切片类型: {type(slice_result)}") # <class 'tuple'>
元组查询和统计
查找元素
fruits = ('apple', 'banana', 'orange', 'grape', 'mango', 'banana')
print("=== 元组查找操作 ===")
print(f"fruits: {fruits}")
# index() - 查找元素第一次出现的索引
print(f"banana的索引: {fruits.index('banana')}") # 1
print(f"grape的索引: {fruits.index('grape')}") # 3
# 指定范围查找
print(f"从索引2开始找banana: {fruits.index('banana', 2)}") # 5
# count() - 统计元素出现次数
print(f"banana出现次数: {fruits.count('banana')}") # 2
print(f"apple出现次数: {fruits.count('apple')}") # 1
# in 运算符 - 检查元素是否存在
print(f"'orange'在元组中: {'orange' in fruits}") # True
print(f"'pear'在元组中: {'pear' in fruits}") # False
# not in 运算符
print(f"'pear'不在元组中: {'pear' not in fruits}") # True
元组信息
numbers = (5, 2, 8, 1, 9, 3)
print("=== 元组信息 ===")
print(f"numbers: {numbers}")
# 长度
print(f"元组长度: {len(numbers)}")
# 最大值和最小值
print(f"最大值: {max(numbers)}")
print(f"最小值: {min(numbers)}")
# 求和
print(f"总和: {sum(numbers)}")
# 任何和所有元素为True
bool_tuple = (True, False, True)
print(f"any: {any(bool_tuple)}") # 至少一个为True
print(f"all: {all(bool_tuple)}") # 所有都为True
元组排序和反转
排序操作
numbers = (5, 2, 8, 1, 9, 3)
fruits = ('banana', 'apple', 'orange', 'grape')
print("=== 元组排序 ===")
# sorted() - 返回新列表(元组不可变,不能原地排序)
sorted_numbers = sorted(numbers)
sorted_fruits = sorted(fruits)
print(f"原元组: {numbers}")
print(f"排序后列表: {sorted_numbers}") # [1, 2, 3, 5, 8, 9]
print(f"排序类型: {type(sorted_numbers)}")
print(f"原水果元组: {fruits}")
print(f"排序后水果: {sorted_fruits}") # ['apple', 'banana', 'grape', 'orange']
# 降序排序
descending = sorted(numbers, reverse=True)
print(f"降序排序: {descending}") # [9, 8, 5, 3, 2, 1]
# 转回元组
sorted_tuple = tuple(sorted(numbers))
print(f"排序后元组: {sorted_tuple}") # (1, 2, 3, 5, 8, 9)
反转操作
numbers = (1, 2, 3, 4, 5)
print("=== 元组反转 ===")
print(f"原元组: {numbers}")
# reversed() - 返回反转迭代器
reversed_iter = reversed(numbers)
reversed_tuple = tuple(reversed_iter)
print(f"反转后: {reversed_tuple}") # (5, 4, 3, 2, 1)
# 使用切片反转
slice_reversed = numbers[::-1]
print(f"切片反转: {slice_reversed}") # (5, 4, 3, 2, 1)
# 两种方法比较
print(f"reversed() 类型: {type(reversed(numbers))}") # <class 'reversed'>
print(f"切片反转类型: {type(slice_reversed)}") # <class 'tuple'>
元组拆包
基本拆包
# 基本拆包
person = ('Alice', 25, 'Engineer')
name, age, job = person
print(f"姓名: {name}, 年龄: {age}, 职业: {job}")
# 坐标示例
point = (10, 20)
x, y = point
print(f"坐标: x={x}, y={y}")
# RGB颜色
color = (255, 128, 0)
red, green, blue = color
print(f"颜色: R={red}, G={green}, B={blue}")
高级拆包技巧
# 使用 * 收集剩余元素
numbers = (1, 2, 3, 4, 5, 6, 7, 8, 9)
first, *middle, last = numbers
print(f"first: {first}") # 1
print(f"middle: {middle}") # [2, 3, 4, 5, 6, 7, 8]
print(f"last: {last}") # 9
# 多个 * 收集
first, second, *rest = numbers
print(f"first: {first}, second: {second}, rest: {rest}")
# 忽略某些元素
name, _, job = ('Bob', 30, 'Designer') # 使用 _ 忽略年龄
print(f"姓名: {name}, 职业: {job}")
# 交换变量
a, b = 10, 20
a, b = b, a # 交换
print(f"a={a}, b={b}") # a=20, b=10
函数返回多值
def get_statistics(numbers):
"""返回统计信息"""
return min(numbers), max(numbers), sum(numbers) / len(numbers)
def get_coordinates():
"""返回坐标"""
return 100, 200, 50 # x, y, z
# 拆包函数返回值
data = (5, 10, 15, 20, 25)
min_val, max_val, avg_val = get_statistics(data)
print(f"最小值: {min_val}, 最大值: {max_val}, 平均值: {avg_val:.2f}")
# 直接拆包
x, y, z = get_coordinates()
print(f"坐标: x={x}, y={y}, z={z}")
元组与列表的比较
性能比较
import time
import sys
def performance_comparison():
"""元组和列表性能比较"""
# 创建测试数据
list_data = list(range(1000000))
tuple_data = tuple(range(1000000))
print("=== 性能比较 ===")
# 内存占用
list_size = sys.getsizeof(list_data)
tuple_size = sys.getsizeof(tuple_data)
print(f"列表内存占用: {list_size} 字节")
print(f"元组内存占用: {tuple_size} 字节")
print(f"元组比列表节省: {list_size - tuple_size} 字节")
# 创建时间
start = time.time()
list_data = list(range(1000000))
list_time = time.time() - start
start = time.time()
tuple_data = tuple(range(1000000))
tuple_time = time.time() - start
print(f"列表创建时间: {list_time:.6f}秒")
print(f"元组创建时间: {tuple_time:.6f}秒")
# 访问时间
start = time.time()
for i in range(1000):
_ = list_data[i]
list_access = time.time() - start
start = time.time()
for i in range(1000):
_ = tuple_data[i]
tuple_access = time.time() - start
print(f"列表访问时间: {list_access:.6f}秒")
print(f"元组访问时间: {tuple_access:.6f}秒")
performance_comparison()
使用场景比较
def use_case_demonstration():
"""使用场景演示"""
# 场景1: 固定数据集合(使用元组)
DAYS_OF_WEEK = ('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday')
RGB_COLORS = ('Red', 'Green', 'Blue')
print("固定数据集合:")
print(f"星期: {DAYS_OF_WEEK}")
print(f"RGB颜色: {RGB_COLORS}")
# 场景2: 函数返回多个值(使用元组)
def get_circle_info(radius):
from math import pi
circumference = 2 * pi * radius
area = pi * radius ** 2
return radius, circumference, area # 返回元组
radius, circ, area = get_circle_info(5)
print(f"\n圆信息 - 半径: {radius}, 周长: {circ:.2f}, 面积: {area:.2f}")
# 场景3: 字典键(使用元组,因为列表不能作为字典键)
coordinates_map = {
(0, 0): '原点',
(1, 1): '第一象限',
(-1, -1): '第三象限'
}
print(f"\n坐标映射: {coordinates_map}")
# 场景4: 需要修改的数据(使用列表)
shopping_list = ['apple', 'banana', 'milk']
shopping_list.append('bread')
print(f"\n购物清单: {shopping_list}")
use_case_demonstration()
元组推导式
生成器表达式
# 元组没有直接的推导式,但可以使用生成器表达式
numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
# 生成器表达式创建元组
even_squares = tuple(x**2 for x in numbers if x % 2 == 0)
print(f"偶数平方元组: {even_squares}") # (4, 16, 36, 64, 100)
# 复杂生成器表达式
words = ('hello', 'world', 'python', 'programming')
result = tuple(word.upper() for word in words if len(word) > 5)
print(f"长单词大写: {result}") # ('PYTHON', 'PROGRAMMING')
# 多个条件的生成器
numbers = range(1, 11)
filtered = tuple(x for x in numbers if x % 2 == 0 and x > 5)
print(f"大于5的偶数: {filtered}") # (6, 8, 10)
元组方法总结
可用方法演示
def tuple_methods_demo():
"""元组方法演示"""
sample_tuple = (1, 2, 3, 2, 4, 2, 5)
print("=== 元组方法 ===")
print(f"示例元组: {sample_tuple}")
# count() 方法
count_2 = sample_tuple.count(2)
print(f"count(2): {count_2}") # 3
# index() 方法
first_2 = sample_tuple.index(2)
print(f"index(2): {first_2}") # 1
# 从指定位置开始查找
second_2 = sample_tuple.index(2, 2) # 从索引2开始找
print(f"index(2, 2): {second_2}") # 3
# 在范围内查找
third_2 = sample_tuple.index(2, 4, 6) # 在索引4-6之间找
print(f"index(2, 4, 6): {third_2}") # 5
tuple_methods_demo()
实际应用案例
数据库记录处理
def process_database_records():
"""模拟数据库记录处理"""
# 模拟数据库查询结果(每行是一个元组)
users = [
(1, 'Alice', 'alice@email.com', 25),
(2, 'Bob', 'bob@email.com', 30),
(3, 'Charlie', 'charlie@email.com', 35),
(4, 'Diana', 'diana@email.com', 28)
]
print("=== 用户数据库记录 ===")
for user in users:
user_id, name, email, age = user # 拆包
print(f"ID: {user_id}, 姓名: {name}, 邮箱: {email}, 年龄: {age}")
# 统计信息
ages = tuple(user[3] for user in users) # 提取年龄
avg_age = sum(ages) / len(ages)
print(f"\n平均年龄: {avg_age:.1f}")
print(f"最年长: {max(ages)}岁")
print(f"最年轻: {min(ages)}岁")
return users
# 使用示例
users_data = process_database_records()
坐标系统
class PointSystem:
"""坐标系统"""
def __init__(self):
self.points = []
def add_point(self, x, y, z=0):
"""添加坐标点"""
self.points.append((x, y, z))
def calculate_distances(self):
"""计算所有点到原点的距离"""
from math import sqrt
distances = []
for point in self.points:
x, y, z = point
distance = sqrt(x**2 + y**2 + z**2)
distances.append((point, distance))
return tuple(distances)
def find_closest_point(self):
"""找到离原点最近的点"""
distances = self.calculate_distances()
if not distances:
return None
closest = min(distances, key=lambda x: x[1])
return closest
def get_bounding_box(self):
"""获取边界框"""
if not self.points:
return None
xs = tuple(point[0] for point in self.points)
ys = tuple(point[1] for point in self.points)
zs = tuple(point[2] for point in self.points)
min_point = (min(xs), min(ys), min(zs))
max_point = (max(xs), max(ys), max(zs))
return min_point, max_point
# 使用示例
coord_system = PointSystem()
coord_system.add_point(1, 2, 3)
coord_system.add_point(4, 5, 6)
coord_system.add_point(0, 0, 0)
coord_system.add_point(2, 3, 1)
print("=== 坐标系统 ===")
print(f"所有点: {coord_system.points}")
print(f"距离计算: {coord_system.calculate_distances()}")
print(f"最近点: {coord_system.find_closest_point()}")
print(f"边界框: {coord_system.get_bounding_box()}")
配置系统
class Configuration:
"""配置系统(使用元组保证配置不可变)"""
# 数据库配置
DATABASE = (
'localhost', # host
5432, # port
'myapp', # database
'admin', # username
'password123' # password
)
# API配置
API_CONFIG = (
'https://api.example.com',
'v1',
30, # timeout
3 # retries
)
# 功能开关
FEATURE_FLAGS = (
True, # enable_logging
False, # enable_debug
True, # enable_cache
True # enable_analytics
)
@classmethod
def get_database_config(cls):
"""获取数据库配置"""
host, port, db, user, pwd = cls.DATABASE
return {
'host': host,
'port': port,
'database': db,
'username': user,
'password': pwd
}
@classmethod
def is_feature_enabled(cls, feature_index):
"""检查功能是否启用"""
return cls.FEATURE_FLAGS[feature_index]
# 使用配置
print("=== 系统配置 ===")
db_config = Configuration.get_database_config()
print(f"数据库配置: {db_config}")
print(f"日志功能启用: {Configuration.is_feature_enabled(0)}")
print(f"调试功能启用: {Configuration.is_feature_enabled(1)}")
# 尝试修改配置会创建新元组,不会修改原配置
# Configuration.DATABASE[0] = 'new_host' # 这会报错!
字典操作详解
字典创建和基本特性
字典创建
# 空字典
empty_dict = {}
empty_dict2 = dict()
# 直接创建
person = {
'name': 'Alice',
'age': 25,
'city': 'Beijing'
}
# 使用 dict() 构造函数
person2 = dict(name='Bob', age=30, city='Shanghai')
person3 = dict([('name', 'Charlie'), ('age', 35), ('city', 'Guangzhou')])
# 字典推导式
squares = {x: x**2 for x in range(1, 6)}
print(f"squares: {squares}") # {1: 1, 2: 4, 3: 9, 4: 16, 5: 25}
print("创建的字典:")
print(f"person: {person}")
print(f"person2: {person2}")
print(f"person3: {person3}")
字典的特性
# 键必须是不可变类型
valid_keys = {
'string': 'value1', # 字符串
123: 'value2', # 整数
3.14: 'value3', # 浮点数
(1, 2): 'value4', # 元组
True: 'value5' # 布尔值
}
# 这些会报错!
# invalid = {[1,2]: 'value'} # 列表不可哈希
# invalid = {{}: 'value'} # 字典不可哈希
print("有效的键类型:")
for key, value in valid_keys.items():
print(f" {type(key).__name__}: {key} -> {value}")
字典访问和修改
基本访问
person = {'name': 'Alice', 'age': 25, 'city': 'Beijing'}
# 使用键访问
print(f"姓名: {person['name']}") # Alice
print(f"年龄: {person['age']}") # 25
# 使用 get() 方法(安全访问)
print(f"城市: {person.get('city')}") # Beijing
print(f"职业: {person.get('job')}") # None
print(f"职业: {person.get('job', '未知')}") # 未知
# 检查键是否存在
print(f"有name键: {'name' in person}") # True
print(f"有job键: {'job' in person}") # False
修改字典
person = {'name': 'Alice', 'age': 25}
# 添加/修改键值对
person['city'] = 'Beijing' # 添加
person['age'] = 26 # 修改
print(f"修改后: {person}")
# 使用 update() 批量更新
person.update({'age': 27, 'job': 'Engineer', 'salary': 50000})
print(f"批量更新后: {person}")
# 使用 setdefault() - 如果键不存在则设置默认值
current_age = person.setdefault('age', 30) # 已存在,返回当前值
current_country = person.setdefault('country', 'China') # 不存在,设置并返回
print(f"setdefault后: {person}")
print(f"年龄: {current_age}, 国家: {current_country}")
字典删除操作
删除元素
person = {'name': 'Alice', 'age': 25, 'city': 'Beijing', 'job': 'Engineer'}
print("原始字典:", person)
# pop() - 删除指定键并返回值
age = person.pop('age')
print(f"删除年龄: {age}, 剩余: {person}")
# popitem() - 删除最后插入的键值对(Python 3.7+ 保持插入顺序)
last_item = person.popitem()
print(f"删除最后一项: {last_item}, 剩余: {person}")
# del 语句
del person['city']
print(f"del删除后: {person}")
# clear() - 清空字典
person.clear()
print(f"清空后: {person}") # {}
# 删除不存在的键会报错
try:
del person['nonexistent']
except KeyError as e:
print(f"错误: {e}")
字典遍历
各种遍历方式
person = {'name': 'Alice', 'age': 25, 'city': 'Beijing', 'job': 'Engineer'}
print("=== 字典遍历 ===")
# 遍历所有键
print("所有键:")
for key in person:
print(f" - {key}")
# 遍历所有值
print("\n所有值:")
for value in person.values():
print(f" - {value}")
# 遍历所有键值对
print("\n所有键值对:")
for key, value in person.items():
print(f" {key}: {value}")
# 同时遍历键和值(使用 enumerate)
print("\n带索引的遍历:")
for i, (key, value) in enumerate(person.items()):
print(f" {i}. {key}: {value}")
字典推导式
# 基本字典推导式
numbers = [1, 2, 3, 4, 5]
squares = {x: x**2 for x in numbers}
print(f"平方字典: {squares}") # {1: 1, 2: 4, 3: 9, 4: 16, 5: 25}
# 带条件的字典推导式
even_squares = {x: x**2 for x in numbers if x % 2 == 0}
print(f"偶数平方: {even_squares}") # {2: 4, 4: 16}
# 键值转换
fruits = {'apple': 1, 'banana': 2, 'orange': 3}
swapped = {value: key for key, value in fruits.items()}
print(f"键值交换: {swapped}") # {1: 'apple', 2: 'banana', 3: 'orange'}
# 复杂推导式
text = "hello world"
char_count = {char: text.count(char) for char in set(text) if char != ' '}
print(f"字符统计: {char_count}")
字典合并和更新
合并字典
dict1 = {'a': 1, 'b': 2}
dict2 = {'b': 3, 'c': 4}
dict3 = {'d': 5, 'e': 6}
print("=== 字典合并 ===")
# 方法1: update() - 原地修改
dict1_copy = dict1.copy()
dict1_copy.update(dict2)
print(f"update合并: {dict1_copy}") # {'a': 1, 'b': 3, 'c': 4}
# 方法2: ** 运算符 (Python 3.5+)
merged = {**dict1, **dict2, **dict3}
print(f"**合并: {merged}") # {'a': 1, 'b': 3, 'c': 4, 'd': 5, 'e': 6}
# 方法3: | 运算符 (Python 3.9+)
# merged = dict1 | dict2 | dict3
# 处理键冲突(后面的覆盖前面的)
base_config = {'host': 'localhost', 'port': 8080, 'debug': False}
user_config = {'port': 9000, 'debug': True}
final_config = {**base_config, **user_config}
print(f"配置合并: {final_config}") # {'host': 'localhost', 'port': 9000, 'debug': True}
嵌套字典
多层嵌套字典
# 嵌套字典创建
company = {
'name': 'Tech Corp',
'employees': {
'Alice': {
'age': 25,
'position': 'Developer',
'skills': ['Python', 'JavaScript']
},
'Bob': {
'age': 30,
'position': 'Manager',
'skills': ['Leadership', 'Project Management']
}
},
'departments': {
'engineering': {
'head': 'Alice',
'budget': 1000000
},
'sales': {
'head': 'Charlie',
'budget': 500000
}
}
}
print("=== 嵌套字典访问 ===")
# 访问嵌套值
print(f"公司: {company['name']}")
print(f"Alice的职位: {company['employees']['Alice']['position']}")
print(f"Alice的技能: {company['employees']['Alice']['skills']}")
# 安全访问嵌套值
alice_age = company.get('employees', {}).get('Alice', {}).get('age', '未知')
dave_age = company.get('employees', {}).get('Dave', {}).get('age', '未知')
print(f"Alice年龄: {alice_age}")
print(f"Dave年龄: {dave_age}")
# 修改嵌套值
company['employees']['Alice']['age'] = 26
company['employees']['Alice']['skills'].append('React')
print(f"修改后Alice: {company['employees']['Alice']}")
嵌套字典操作
def process_nested_dict(data):
"""处理嵌套字典"""
# 遍历嵌套字典
print("=== 公司结构 ===")
for key, value in data.items():
if isinstance(value, dict):
print(f"{key}:")
for sub_key, sub_value in value.items():
print(f" {sub_key}: {sub_value}")
else:
print(f"{key}: {value}")
# 统计信息
total_employees = len(data.get('employees', {}))
total_budget = sum(dept.get('budget', 0) for dept in data.get('departments', {}).values())
print(f"\n统计信息:")
print(f"员工总数: {total_employees}")
print(f"总预算: {total_budget}")
process_nested_dict(company)
字典排序
按键排序
scores = {'Bob': 85, 'Alice': 92, 'Charlie': 78, 'Diana': 95}
print("=== 字典排序 ===")
# 按键排序
sorted_by_key = dict(sorted(scores.items()))
print(f"按键排序: {sorted_by_key}") # {'Alice': 92, 'Bob': 85, 'Charlie': 78, 'Diana': 95}
# 按值排序
sorted_by_value = dict(sorted(scores.items(), key=lambda x: x[1]))
print(f"按值升序: {sorted_by_value}") # {'Charlie': 78, 'Bob': 85, 'Alice': 92, 'Diana': 95}
# 按值降序
sorted_by_value_desc = dict(sorted(scores.items(), key=lambda x: x[1], reverse=True))
print(f"按值降序: {sorted_by_value_desc}") # {'Diana': 95, 'Alice': 92, 'Bob': 85, 'Charlie': 78}
复杂排序
students = {
'Alice': {'math': 85, 'english': 92, 'science': 88},
'Bob': {'math': 78, 'english': 85, 'science': 90},
'Charlie': {'math': 92, 'english': 88, 'science': 95}
}
# 按数学成绩排序
sorted_by_math = dict(sorted(students.items(), key=lambda x: x[1]['math'], reverse=True))
print("按数学成绩排序:")
for name, scores in sorted_by_math.items():
print(f" {name}: 数学 {scores['math']}分")
# 按平均分排序
def get_average(scores):
return sum(scores.values()) / len(scores)
sorted_by_avg = dict(sorted(students.items(), key=lambda x: get_average(x[1]), reverse=True))
print("\n按平均分排序:")
for name, scores in sorted_by_avg.items():
avg = get_average(scores)
print(f" {name}: 平均分 {avg:.1f}")
字典方法详解
常用方法演示
def dictionary_methods_demo():
"""字典方法演示"""
person = {'name': 'Alice', 'age': 25, 'city': 'Beijing'}
print("=== 字典方法 ===")
# keys() - 所有键
print(f"所有键: {list(person.keys())}")
# values() - 所有值
print(f"所有值: {list(person.values())}")
# items() - 所有键值对
print(f"所有键值对: {list(person.items())}")
# get() - 安全获取
print(f"获取姓名: {person.get('name')}")
print(f"获取职业: {person.get('job', '无')}")
# setdefault() - 设置默认值
country = person.setdefault('country', 'China')
print(f"设置国家后: {person}, 返回值: {country}")
# pop() - 删除并返回值
age = person.pop('age')
print(f"删除年龄后: {person}, 返回值: {age}")
# popitem() - 删除最后一项
last_item = person.popitem()
print(f"删除最后一项后: {person}, 返回值: {last_item}")
# update() - 批量更新
person.update({'job': 'Engineer', 'salary': 50000})
print(f"更新后: {person}")
# copy() - 浅拷贝
person_copy = person.copy()
person_copy['name'] = 'Bob'
print(f"原字典: {person}")
print(f"拷贝字典: {person_copy}")
# clear() - 清空
person_copy.clear()
print(f"清空后: {person_copy}")
dictionary_methods_demo()
实际应用案例
单词频率统计
def word_frequency(text):
"""统计单词频率"""
# 清理文本并分割单词
words = text.lower().replace(',', '').replace('.', '').split()
# 统计频率
frequency = {}
for word in words:
frequency[word] = frequency.get(word, 0) + 1
return frequency
def display_frequency(frequency, top_n=5):
"""显示词频统计"""
# 按频率排序
sorted_freq = sorted(frequency.items(), key=lambda x: x[1], reverse=True)
print("=== 单词频率统计 ===")
print(f"总不同单词数: {len(frequency)}")
print(f"前{top_n}个最常出现的单词:")
for i, (word, count) in enumerate(sorted_freq[:top_n], 1):
print(f" {i}. '{word}': {count}次")
# 使用示例
sample_text = """
Python is an interpreted, high-level, general-purpose programming language.
Python's design philosophy emphasizes code readability with its notable use
of significant whitespace. Python is a great language for beginners.
"""
freq = word_frequency(sample_text)
display_frequency(freq, 5)
学生成绩管理系统
class Gradebook:
"""学生成绩管理系统"""
def __init__(self):
self.students = {}
def add_student(self, name):
"""添加学生"""
self.students[name] = {'grades': {}, 'average': 0.0}
def add_grade(self, name, subject, grade):
"""添加成绩"""
if name not in self.students:
self.add_student(name)
self.students[name]['grades'][subject] = grade
self._update_average(name)
def _update_average(self, name):
"""更新平均分"""
grades = self.students[name]['grades'].values()
if grades:
self.students[name]['average'] = sum(grades) / len(grades)
def get_student_grades(self, name):
"""获取学生成绩"""
return self.students.get(name, {}).get('grades', {})
def get_class_average(self):
"""获取班级平均分"""
if not self.students:
return 0.0
averages = [data['average'] for data in self.students.values()]
return sum(averages) / len(averages)
def get_top_students(self, n=3):
"""获取前n名学生"""
sorted_students = sorted(
self.students.items(),
key=lambda x: x[1]['average'],
reverse=True
)
return sorted_students[:n]
def display_summary(self):
"""显示摘要"""
print("=== 成绩管理系统 ===")
print(f"学生总数: {len(self.students)}")
print(f"班级平均分: {self.get_class_average():.2f}")
print("\n学生成绩:")
for name, data in self.students.items():
print(f" {name}: 平均分 {data['average']:.2f}")
for subject, grade in data['grades'].items():
print(f" {subject}: {grade}")
# 使用示例
gradebook = Gradebook()
gradebook.add_grade('Alice', 'Math', 85)
gradebook.add_grade('Alice', 'English', 92)
gradebook.add_grade('Bob', 'Math', 78)
gradebook.add_grade('Bob', 'English', 85)
gradebook.add_grade('Charlie', 'Math', 95)
gradebook.add_grade('Charlie', 'English', 88)
gradebook.display_summary()
print(f"\n前三名: {gradebook.get_top_students(3)}")
配置管理系统
class ConfigManager:
"""配置管理系统"""
def __init__(self):
self.config = {
'database': {
'host': 'localhost',
'port': 5432,
'name': 'myapp',
'user': 'admin'
},
'api': {
'base_url': 'https://api.example.com',
'timeout': 30,
'retries': 3
},
'logging': {
'level': 'INFO',
'file': 'app.log',
'max_size': '10MB'
}
}
def get(self, path, default=None):
"""获取配置值,支持点分隔路径"""
keys = path.split('.')
current = self.config
for key in keys:
if isinstance(current, dict) and key in current:
current = current[key]
else:
return default
return current
def set(self, path, value):
"""设置配置值,支持点分隔路径"""
keys = path.split('.')
current = self.config
# 遍历到倒数第二个键
for key in keys[:-1]:
if key not in current:
current[key] = {}
current = current[key]
# 设置最后一个键的值
current[keys[-1]] = value
def show_config(self, section=None):
"""显示配置"""
if section:
config_to_show = self.get(section)
else:
config_to_show = self.config
print("=== 配置信息 ===")
self._print_dict(config_to_show, 0)
def _print_dict(self, data, indent):
"""递归打印字典"""
for key, value in data.items():
if isinstance(value, dict):
print(' ' * indent + f"{key}:")
self._print_dict(value, indent + 1)
else:
print(' ' * indent + f"{key}: {value}")
# 使用示例
config_mgr = ConfigManager()
print("获取配置值:")
print(f"数据库主机: {config_mgr.get('database.host')}")
print(f"API超时: {config_mgr.get('api.timeout')}")
print(f"不存在的配置: {config_mgr.get('nonexistent.key', '默认值')}")
print("\n设置配置值:")
config_mgr.set('database.port', 3306)
config_mgr.set('api.retries', 5)
config_mgr.set('new.feature.enabled', True)
config_mgr.show_config('database')
高级技巧和性能优化
默认字典 (defaultdict)
from collections import defaultdict
def defaultdict_demo():
"""defaultdict 演示"""
# 传统方式统计列表元素
words = ['apple', 'banana', 'apple', 'orange', 'banana', 'apple']
word_count = {}
for word in words:
if word not in word_count:
word_count[word] = 0
word_count[word] += 1
print(f"传统统计: {word_count}")
# 使用 defaultdict
word_count_dd = defaultdict(int) # int() 返回 0
for word in words:
word_count_dd[word] += 1
print(f"defaultdict统计: {dict(word_count_dd)}")
# 分组数据
students = [
('class1', 'Alice'),
('class2', 'Bob'),
('class1', 'Charlie'),
('class2', 'Diana')
]
classes = defaultdict(list)
for class_name, student in students:
classes[class_name].append(student)
print(f"分组结果: {dict(classes)}")
defaultdict_demo()
字典视图对象
def dict_views_demo():
"""字典视图对象演示"""
person = {'name': 'Alice', 'age': 25, 'city': 'Beijing'}
# 视图对象是动态的
keys_view = person.keys()
values_view = person.values()
items_view = person.items()
print("原始字典:", person)
print("键视图:", list(keys_view))
print("值视图:", list(values_view))
print("项视图:", list(items_view))
# 修改字典会影响视图
person['job'] = 'Engineer'
person['age'] = 26
print("\n修改后:")
print("键视图:", list(keys_view)) # 包含新键
print("值视图:", list(values_view)) # 包含新值
print("项视图:", list(items_view)) # 包含新项
# 视图支持集合操作
dict1 = {'a': 1, 'b': 2, 'c': 3}
dict2 = {'b': 2, 'c': 4, 'd': 5}
keys1 = dict1.keys()
keys2 = dict2.keys()
print(f"\n集合操作:")
print(f"共同键: {keys1 & keys2}") # {'b', 'c'}
print(f"所有键: {keys1 | keys2}") # {'a', 'b', 'c', 'd'}
print(f"唯一键: {keys1 - keys2}") # {'a'}
dict_views_demo()
集合操作详解
集合创建和基本特性
集合创建
# 空集合(必须用 set(),{} 创建的是空字典)
empty_set = set()
empty_set2 = set()
# 包含元素的集合
numbers = {1, 2, 3, 4, 5}
fruits = {'apple', 'banana', 'orange'}
mixed = {1, 'hello', 3.14, True}
# 使用 set() 构造函数
from_list = set([1, 2, 3, 2, 1]) # {1, 2, 3} - 自动去重
from_string = set("hello") # {'h', 'e', 'l', 'o'} - 自动去重
from_range = set(range(5)) # {0, 1, 2, 3, 4}
print("创建的集合:")
print(f"empty_set: {empty_set}")
print(f"numbers: {numbers}")
print(f"from_list: {from_list}")
print(f"from_string: {from_string}")
集合的特性
# 集合元素必须是不可变类型
valid_set = {
'string', # 字符串
123, # 整数
3.14, # 浮点数
(1, 2), # 元组
True # 布尔值
}
# 这些会报错!
# invalid = {[1,2]} # 列表不可哈希
# invalid = {{'a':1}} # 字典不可哈希
print("集合特性演示:")
print(f"自动去重: {set([1,2,2,3,3,3,4])}") # {1, 2, 3, 4}
print(f"无序性: {set('python')}") # 每次运行顺序可能不同
集合基本操作
添加和删除元素
fruits = {'apple', 'banana', 'orange'}
print("原始集合:", fruits)
# add() - 添加单个元素
fruits.add('grape')
print(f"add后: {fruits}")
# update() - 添加多个元素
fruits.update(['mango', 'pear'])
fruits.update({'pineapple', 'watermelon'})
print(f"update后: {fruits}")
# remove() - 删除元素(元素不存在会报错)
fruits.remove('banana')
print(f"remove后: {fruits}")
# discard() - 删除元素(元素不存在不报错)
fruits.discard('nonexistent') # 不会报错
fruits.discard('apple')
print(f"discard后: {fruits}")
# pop() - 随机删除并返回一个元素
removed = fruits.pop()
print(f"pop删除: {removed}, 剩余: {fruits}")
# clear() - 清空集合
fruits.clear()
print(f"clear后: {fruits}")
集合查询
numbers = {1, 3, 5, 7, 9}
print("=== 集合查询 ===")
print(f"集合: {numbers}")
# in 运算符
print(f"3在集合中: {3 in numbers}") # True
print(f"2在集合中: {2 in numbers}") # False
# not in 运算符
print(f"2不在集合中: {2 not in numbers}") # True
# 长度
print(f"集合大小: {len(numbers)}")
# 检查子集和超集
set_a = {1, 2, 3}
set_b = {1, 2, 3, 4, 5}
print(f"set_a: {set_a}")
print(f"set_b: {set_b}")
print(f"set_a是set_b的子集: {set_a.issubset(set_b)}") # True
print(f"set_b是set_a的超集: {set_b.issuperset(set_a)}") # True
print(f"set_a < set_b: {set_a < set_b}") # True (真子集)
集合运算
基本集合运算
set_a = {1, 2, 3, 4, 5}
set_b = {4, 5, 6, 7, 8}
print("=== 集合运算 ===")
print(f"set_a: {set_a}")
print(f"set_b: {set_b}")
# 并集 (union)
union = set_a | set_b
union_method = set_a.union(set_b)
print(f"并集 (|): {union}")
print(f"并集 (union()): {union_method}")
# 交集 (intersection)
intersection = set_a & set_b
intersection_method = set_a.intersection(set_b)
print(f"交集 (&): {intersection}")
print(f"交集 (intersection()): {intersection_method}")
# 差集 (difference)
difference = set_a - set_b
difference_method = set_a.difference(set_b)
print(f"差集 (-): {difference}") # 在A中但不在B中
print(f"差集 (difference()): {difference_method}")
# 对称差集 (symmetric difference)
symmetric_diff = set_a ^ set_b
symmetric_diff_method = set_a.symmetric_difference(set_b)
print(f"对称差集 (^): {symmetric_diff}") # 只在A或只在B中的元素
print(f"对称差集 (symmetric_difference()): {symmetric_diff_method}")
集合运算方法
def set_operations_demo():
"""集合运算方法演示"""
A = {1, 2, 3, 4, 5}
B = {4, 5, 6, 7}
C = {7, 8, 9}
print("=== 集合运算方法 ===")
print(f"A: {A}, B: {B}, C: {C}")
# 多个集合的运算
print(f"三个集合的并集: {A | B | C}")
print(f"三个集合的交集: {A & B & C}")
# 更新方法(原地修改)
A_copy = A.copy()
A_copy.update(B) # A_copy |= B
print(f"update后: {A_copy}")
A_copy = A.copy()
A_copy.intersection_update(B) # A_copy &= B
print(f"intersection_update后: {A_copy}")
A_copy = A.copy()
A_copy.difference_update(B) # A_copy -= B
print(f"difference_update后: {A_copy}")
A_copy = A.copy()
A_copy.symmetric_difference_update(B) # A_copy ^= B
print(f"symmetric_difference_update后: {A_copy}")
# 不相交检查
print(f"A和C是否不相交: {A.isdisjoint(C)}") # True
print(f"B和C是否不相交: {B.isdisjoint(C)}") # False
set_operations_demo()
集合推导式
基本集合推导式
# 集合推导式
numbers = [1, 2, 2, 3, 3, 3, 4, 4, 4, 4]
unique_squares = {x**2 for x in numbers}
print(f"唯一平方数: {unique_squares}") # {16, 1, 9, 4}
# 带条件的集合推导式
even_squares = {x**2 for x in range(10) if x % 2 == 0}
print(f"偶数平方: {even_squares}") # {0, 4, 16, 36, 64}
# 字符串处理
words = ['hello', 'world', 'python', 'programming']
unique_chars = {char for word in words for char in word if char not in 'aeiou'}
print(f"非元音字母: {unique_chars}")
复杂集合推导式
def advanced_set_comprehensions():
"""高级集合推导式"""
# 从字典创建集合
student_scores = {'Alice': 85, 'Bob': 92, 'Charlie': 78, 'Diana': 92}
high_scorers = {name for name, score in student_scores.items() if score > 80}
print(f"高分学生: {high_scorers}")
# 处理嵌套数据
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
all_numbers = {num for row in matrix for num in row}
print(f"所有数字: {all_numbers}")
# 条件过滤和转换
numbers = range(20)
result = {x for x in numbers if x % 2 == 0} | {x for x in numbers if x % 3 == 0}
print(f"2或3的倍数: {result}")
advanced_set_comprehensions()
不可变集合 (frozenset)
frozenset 使用
# frozenset 创建
frozen = frozenset([1, 2, 3, 4, 5])
frozen2 = frozenset(range(3))
print("=== frozenset ===")
print(f"frozen: {frozen}")
print(f"类型: {type(frozen)}")
# frozenset 是不可变的
try:
frozen.add(6) # 会报错
except AttributeError as e:
print(f"错误: {e}")
# 但支持集合运算
result = frozen | frozen2
print(f"并集: {result}")
print(f"类型: {type(result)}") # 仍然是 frozenset
# 用作字典键
set_dict = {
frozenset([1, 2, 3]): "set_123",
frozenset(['a', 'b']): "set_ab"
}
print(f"集合字典: {set_dict}")
集合实际应用
数据去重
def data_deduplication():
"""数据去重应用"""
# 列表去重
duplicate_list = [1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5]
unique_list = list(set(duplicate_list))
print(f"原始列表: {duplicate_list}")
print(f"去重后: {unique_list}")
# 字符串去重
text = "programming"
unique_chars = ''.join(sorted(set(text)))
print(f"原始字符串: {text}")
print(f"唯一字符: {unique_chars}")
# 复杂数据去重
students = [
{'name': 'Alice', 'age': 25},
{'name': 'Bob', 'age': 30},
{'name': 'Alice', 'age': 25}, # 重复
{'name': 'Charlie', 'age': 35}
]
# 使用 frozenset 对字典去重
unique_students = []
seen = set()
for student in students:
# 将字典转换为 frozenset 来检查重复
student_frozen = frozenset(student.items())
if student_frozen not in seen:
seen.add(student_frozen)
unique_students.append(student)
print(f"去重前学生数: {len(students)}")
print(f"去重后学生数: {len(unique_students)}")
data_deduplication()
关系分析
def relationship_analysis():
"""集合关系分析"""
# 学生选课情况
math_students = {'Alice', 'Bob', 'Charlie', 'Diana'}
physics_students = {'Bob', 'Charlie', 'Eve', 'Frank'}
chemistry_students = {'Alice', 'Charlie', 'Eve', 'Grace'}
print("=== 学生选课分析 ===")
print(f"数学课学生: {math_students}")
print(f"物理课学生: {physics_students}")
print(f"化学课学生: {chemistry_students}")
# 同时选数学和物理的学生
math_physics = math_students & physics_students
print(f"同时选数学和物理: {math_physics}")
# 只选数学的学生
only_math = math_students - physics_students - chemistry_students
print(f"只选数学: {only_math}")
# 选所有三门课的学生
all_three = math_students & physics_students & chemistry_students
print(f"选所有三门课: {all_three}")
# 至少选一门课的学生
at_least_one = math_students | physics_students | chemistry_students
print(f"至少选一门课: {at_least_one}")
print(f"总学生数: {len(at_least_one)}")
relationship_analysis()
权限管理系统
class PermissionSystem:
"""基于集合的权限管理系统"""
def __init__(self):
self.roles = {
'admin': {'read', 'write', 'delete', 'manage_users'},
'editor': {'read', 'write'},
'viewer': {'read'},
'moderator': {'read', 'write', 'delete'}
}
self.user_roles = {}
def assign_role(self, user, role):
"""分配角色"""
if role in self.roles:
self.user_roles[user] = role
else:
raise ValueError(f"角色 '{role}' 不存在")
def get_user_permissions(self, user):
"""获取用户权限"""
role = self.user_roles.get(user)
if role:
return self.roles[role]
return set()
def check_permission(self, user, permission):
"""检查权限"""
user_permissions = self.get_user_permissions(user)
return permission in user_permissions
def can_access_feature(self, user, required_permissions):
"""检查是否能访问需要多个权限的功能"""
user_permissions = self.get_user_permissions(user)
return required_permissions.issubset(user_permissions)
def display_permissions(self):
"""显示权限信息"""
print("=== 权限系统 ===")
for user, role in self.user_roles.items():
permissions = self.get_user_permissions(user)
print(f"用户 {user} (角色: {role}): {sorted(permissions)}")
# 使用示例
permission_sys = PermissionSystem()
permission_sys.assign_role('alice', 'admin')
permission_sys.assign_role('bob', 'editor')
permission_sys.assign_role('charlie', 'viewer')
permission_sys.display_permissions()
print(f"\n权限检查:")
print(f"Alice可以删除: {permission_sys.check_permission('alice', 'delete')}")
print(f"Bob可以删除: {permission_sys.check_permission('bob', 'delete')}")
print(f"Charlie可以写: {permission_sys.check_permission('charlie', 'write')}")
# 检查复杂权限
advanced_feature = {'read', 'write', 'delete'}
print(f"Alice可以访问高级功能: {permission_sys.can_access_feature('alice', advanced_feature)}")
print(f"Bob可以访问高级功能: {permission_sys.can_access_feature('bob', advanced_feature)}")
集合性能分析
成员检查性能
import time
def membership_performance():
"""成员检查性能比较"""
# 创建测试数据
size = 100000
test_list = list(range(size))
test_set = set(range(size))
print("=== 成员检查性能比较 ===")
# 列表成员检查
start = time.time()
result1 = size-1 in test_list # 最坏情况:检查最后一个元素
time1 = time.time() - start
# 集合成员检查
start = time.time()
result2 = size-1 in test_set # 平均情况:O(1)
time2 = time.time() - start
print(f"列表检查时间: {time1:.6f}秒")
print(f"集合检查时间: {time2:.6f}秒")
print(f"性能提升: {time1/time2:.1f}倍")
# 检查不存在元素
start = time.time()
result3 = size in test_list # 需要遍历整个列表
time3 = time.time() - start
start = time.time()
result4 = size in test_set # 平均情况:O(1)
time4 = time.time() - start
print(f"\n检查不存在元素:")
print(f"列表检查时间: {time3:.6f}秒")
print(f"集合检查时间: {time4:.6f}秒")
print(f"性能提升: {time3/time4:.1f}倍")
membership_performance()
集合方法总结
所有集合方法
def all_set_methods():
"""所有集合方法演示"""
A = {1, 2, 3, 4, 5}
B = {4, 5, 6, 7}
print("=== 集合方法总结 ===")
print(f"A: {A}, B: {B}")
# 添加和删除
A.add(8) # 添加元素
A.remove(8) # 删除元素(不存在报错)
A.discard(99) # 删除元素(不存在不报错)
popped = A.pop() # 随机删除并返回一个元素
A.clear() # 清空集合
# 重新初始化
A = {1, 2, 3, 4, 5}
# 集合运算
print(f"并集: {A.union(B)}") # A | B
print(f"交集: {A.intersection(B)}") # A & B
print(f"差集: {A.difference(B)}") # A - B
print(f"对称差集: {A.symmetric_difference(B)}") # A ^ B
# 更新方法
A_copy = A.copy()
A_copy.update(B) # A |= B
A_copy = A.copy()
A_copy.intersection_update(B) # A &= B
A_copy = A.copy()
A_copy.difference_update(B) # A -= B
A_copy = A.copy()
A_copy.symmetric_difference_update(B) # A ^= B
# 关系检查
print(f"A是B的子集: {A.issubset(B)}") # A <= B
print(f"A是B的真子集: {A < B}") # A < B
print(f"A是B的超集: {A.issuperset(B)}") # A >= B
print(f"A是B的真超集: {A > B}") # A > B
print(f"A和B不相交: {A.isdisjoint(B)}") # 没有共同元素
# 其他
print(f"集合大小: {len(A)}")
print(f"浅拷贝: {A.copy()}")
all_set_methods()
实际项目案例
推荐系统相似度计算
def jaccard_similarity(set1, set2):
"""计算Jaccard相似度"""
intersection = len(set1 & set2)
union = len(set1 | set2)
return intersection / union if union != 0 else 0
def recommend_similar_users(target_user, all_users, user_interests):
"""推荐相似用户"""
target_interests = user_interests[target_user]
similarities = {}
for user, interests in user_interests.items():
if user != target_user:
similarity = jaccard_similarity(target_interests, interests)
similarities[user] = similarity
# 按相似度排序
recommended = sorted(similarities.items(), key=lambda x: x[1], reverse=True)
return recommended[:3] # 返回前3个最相似的用户
# 使用示例
user_interests = {
'Alice': {'python', 'machine learning', 'data science', 'coding'},
'Bob': {'python', 'web development', 'javascript', 'coding'},
'Charlie': {'machine learning', 'deep learning', 'python', 'ai'},
'Diana': {'web development', 'javascript', 'html', 'css'},
'Eve': {'data science', 'statistics', 'python', 'machine learning'}
}
print("=== 用户推荐系统 ===")
recommendations = recommend_similar_users('Alice', user_interests.keys(), user_interests)
print("给Alice推荐的相似用户:")
for user, similarity in recommendations:
print(f" {user}: 相似度 {similarity:.2f}")
数据清洗工具
class DataCleaner:
"""基于集合的数据清洗工具"""
def __init__(self):
self.stop_words = {'the', 'a', 'an', 'in', 'on', 'at', 'to', 'for', 'of', 'and', 'or', 'but'}
self.valid_categories = {'electronics', 'clothing', 'books', 'home', 'sports'}
def remove_stop_words(self, text):
"""移除停用词"""
words = text.lower().split()
filtered_words = [word for word in words if word not in self.stop_words]
return ' '.join(filtered_words)
def find_duplicate_emails(self, email_list):
"""查找重复邮箱"""
seen = set()
duplicates = set()
for email in email_list:
if email in seen:
duplicates.add(email)
else:
seen.add(email)
return duplicates
def validate_categories(self, categories):
"""验证分类"""
invalid_categories = set(categories) - self.valid_categories
valid_categories = set(categories) & self.valid_categories
return valid_categories, invalid_categories
def find_common_tags(self, items):
"""查找共同标签"""
if not items:
return set()
common_tags = set(items[0])
for item in items[1:]:
common_tags &= set(item)
return common_tags
# 使用示例
cleaner = DataCleaner()
# 文本清洗
text = "The quick brown fox jumps over the lazy dog"
cleaned_text = cleaner.remove_stop_words(text)
print(f"原始文本: {text}")
print(f"清洗后: {cleaned_text}")
# 重复检测
emails = ['alice@email.com', 'bob@email.com', 'alice@email.com', 'charlie@email.com']
duplicates = cleaner.find_duplicate_emails(emails)
print(f"\n重复邮箱: {duplicates}")
# 分类验证
user_categories = ['electronics', 'clothing', 'invalid', 'books']
valid, invalid = cleaner.validate_categories(user_categories)
print(f"有效分类: {valid}")
print(f"无效分类: {invalid}")
# 共同标签
products = [
['python', 'programming', 'book'],
['python', 'web', 'programming'],
['python', 'data', 'programming']
]
common = cleaner.find_common_tags(products)
print(f"共同标签: {common}")
最佳实践和技巧
集合使用技巧
def set_tips_and_tricks():
"""集合使用技巧"""
# 1. 快速去重
numbers = [1, 2, 2, 3, 3, 3, 4, 4, 4, 4]
unique = list(set(numbers))
print(f"去重技巧: {unique}")
# 2. 查找两个列表的不同元素
list1 = [1, 2, 3, 4, 5]
list2 = [4, 5, 6, 7, 8]
only_in_list1 = set(list1) - set(list2)
only_in_list2 = set(list2) - set(list1)
print(f"只在list1: {only_in_list1}")
print(f"只在list2: {only_in_list2}")
# 3. 检查列表是否有重复元素
def has_duplicates(lst):
return len(lst) != len(set(lst))
print(f"列表有重复: {has_duplicates([1,2,3,2])}")
print(f"列表无重复: {has_duplicates([1,2,3,4])}")
# 4. 集合解析
words = ['hello', 'world', 'python', 'programming']
first_letters = {word[0] for word in words}
print(f"首字母集合: {first_letters}")
# 5. 使用frozenset作为字典键
graph = {
frozenset(['A', 'B']): 5,
frozenset(['B', 'C']): 3,
frozenset(['A', 'C']): 7
}
print(f"图边权重: {graph}")
set_tips_and_tricks()
可变与不可变对象
基本概念
可变对象 (Mutable)
- 创建后可以修改其内容
- 修改时不创建新对象,内存地址不变
- 例子:列表、字典、集合
不可变对象 (Immutable)
- 创建后不能修改其内容
- 任何修改操作都会创建新对象,内存地址改变
- 例子:数字、字符串、元组、布尔值
直观演示
不可变对象示例
# 整数 - 不可变
x = 10
print(f"修改前: x = {x}, id = {id(x)}")
x = x + 1 # 看起来是修改,实际上是创建新对象
print(f"修改后: x = {x}, id = {id(x)}") # id 改变了!
# 字符串 - 不可变
s = "hello"
print(f"修改前: s = {s}, id = {id(s)}")
s = s + " world" # 创建新字符串对象
print(f"修改后: s = {s}, id = {id(s)}") # id 改变了!
# 元组 - 不可变
t = (1, 2, 3)
print(f"元组: {t}, id = {id(t)}")
# t[0] = 10 # 这会报错!不能修改元组元素
可变对象示例
# 列表 - 可变
lst = [1, 2, 3]
print(f"修改前: lst = {lst}, id = {id(lst)}")
lst.append(4) # 直接修改原列表
lst[0] = 10 # 直接修改元素
print(f"修改后: lst = {lst}, id = {id(lst)}") # id 不变!
# 字典 - 可变
d = {'a': 1, 'b': 2}
print(f"修改前: d = {d}, id = {id(d)}")
d['c'] = 3 # 添加新键值对
d['a'] = 100 # 修改值
print(f"修改后: d = {d}, id = {id(d)}") # id 不变!
# 集合 - 可变
s = {1, 2, 3}
print(f"修改前: s = {s}, id = {id(s)}")
s.add(4) # 添加元素
print(f"修改后: s = {s}, id = {id(s)}") # id 不变!
重要区别
赋值操作的区别
# 不可变对象
a = 100
b = a # b 和 a 指向同一个对象
print(f"a id: {id(a)}, b id: {id(b)}") # id 相同
a = 200 # a 指向新对象,b 不变
print(f"a = {a}, b = {b}") # a=200, b=100
# 可变对象
list1 = [1, 2, 3]
list2 = list1 # list2 和 list1 指向同一个列表对象
print(f"list1 id: {id(list1)}, list2 id: {id(list2)}") # id 相同
list1.append(4) # 修改原对象,list2 也会受影响!
print(f"list1 = {list1}, list2 = {list2}") # 都是 [1, 2, 3, 4]
函数参数传递的影响
def modify_immutable(x):
"""修改不可变对象"""
x = x + 10 # 创建新对象,不影响外部的 x
print(f"函数内: x = {x}, id = {id(x)}")
def modify_mutable(lst):
"""修改可变对象"""
lst.append(100) # 直接修改原对象
print(f"函数内: lst = {lst}, id = {id(lst)}")
# 测试不可变对象
num = 5
print(f"调用前: num = {num}, id = {id(num)}")
modify_immutable(num)
print(f"调用后: num = {num}, id = {id(num)}") # num 不变!
print()
# 测试可变对象
my_list = [1, 2, 3]
print(f"调用前: my_list = {my_list}, id = {id(my_list)}")
modify_mutable(my_list)
print(f"调用后: my_list = {my_list}, id = {id(my_list)}") # my_list 被修改了!
完整分类
不可变对象 (Immutable)
# 数字类型
int_num = 42
float_num = 3.14
complex_num = 2 + 3j
# 字符串
text = "hello"
# 元组
tuple_data = (1, 2, 3)
# 布尔值
bool_val = True
# 冻结集合
frozen_set = frozenset([1, 2, 3])
# 字节
bytes_data = b"hello"
可变对象 (Mutable)
# 列表
list_data = [1, 2, 3]
# 字典
dict_data = {'a': 1, 'b': 2}
# 集合
set_data = {1, 2, 3}
# 字节数组
bytearray_data = bytearray(b"hello")
# 自定义类对象(通常可变)
class Person:
def __init__(self, name):
self.name = name
person = Person("Alice")
总结表格
| 特性 | 可变对象 | 不可变对象 |
|---|---|---|
| 修改内容 | ✅ 可以 | ❌ 不可以 |
| 内存地址 | 修改时不变 | 修改时改变 |
| 赋值操作 | 浅拷贝(共享引用) | 创建新对象 |
| 字典键 | ❌ 不能作为键 | ✅ 可以作为键 |
| 线程安全 | ❌ 不安全 | ✅ 安全 |
| 例子 | 列表、字典、集合 | 数字、字符串、元组 |
核心要点
- 可变对象:直接修改内容,内存地址不变
- 不可变对象:修改时创建新对象,内存地址改变
- 赋值操作:对于可变对象要小心,多个变量可能指向同一个对象
- 函数参数:传递可变对象时,函数内修改会影响原对象
函数
函数定义和调用
基本函数定义
# 最简单的函数
def greet():
"""打印问候语"""
print("Hello, World!")
# 带参数的函数
def greet_name(name):
"""向指定的人问好"""
print(f"Hello, {name}!")
# 带返回值的函数
def add(a, b):
"""返回两个数的和"""
return a + b
# 函数调用
greet() # Hello, World!
greet_name("Alice") # Hello, Alice!
result = add(3, 5) # result = 8
print(f"3 + 5 = {result}")
文档字符串 (Docstring)
def calculate_circle_area(radius):
"""
计算圆的面积
Args:
radius (float): 圆的半径
Returns:
float: 圆的面积
"""
import math
return math.pi * radius ** 2
# 查看文档字符串
print(calculate_circle_area.__doc__)
help(calculate_circle_area)
函数参数
位置参数
def describe_pet(animal_type, pet_name):
"""显示宠物信息"""
print(f"I have a {animal_type}.")
print(f"My {animal_type}'s name is {pet_name}.")
# 必须按顺序传递参数
describe_pet('dog', 'Willie')
describe_pet('cat', 'Mimi')
关键字参数
def describe_pet(animal_type, pet_name):
"""显示宠物信息"""
print(f"I have a {animal_type}.")
print(f"My {animal_type}'s name is {pet_name}.")
# 使用关键字参数,顺序不重要
describe_pet(animal_type='hamster', pet_name='Harry')
describe_pet(pet_name='Harry', animal_type='hamster')
# 混合使用
describe_pet('dog', pet_name='Willie')
默认参数
def describe_pet(pet_name, animal_type='dog'):
"""显示宠物信息,animal_type有默认值"""
print(f"I have a {animal_type}.")
print(f"My {animal_type}'s name is {pet_name}.")
# 使用默认值
describe_pet('Willie') # animal_type 使用默认值 'dog'
describe_pet('Harry', 'hamster') # 提供 animal_type
describe_pet(animal_type='cat', pet_name='Mimi') # 使用关键字参数
# 注意:默认参数必须放在非默认参数后面
可变参数 (*args 和 **kwargs)
def make_pizza(*toppings):
"""打印顾客点的所有配料"""
print("Making a pizza with the following toppings:")
for topping in toppings:
print(f"- {topping}")
make_pizza('pepperoni')
make_pizza('mushrooms', 'green peppers', 'extra cheese')
def build_profile(first, last, **user_info):
"""创建一个字典,包含用户的所有信息"""
profile = {'first_name': first, 'last_name': last}
for key, value in user_info.items():
profile[key] = value
return profile
user_profile = build_profile('albert', 'einstein',
location='princeton',
field='physics')
print(user_profile)
返回值
返回单个值
def get_formatted_name(first_name, last_name):
"""返回整洁的姓名"""
full_name = f"{first_name} {last_name}"
return full_name.title()
musician = get_formatted_name('jimi', 'hendrix')
print(musician) # Jimi Hendrix
返回多个值
def operate_numbers(a, b):
"""返回多个运算结果"""
sum_result = a + b
difference = a - b
product = a * b
quotient = a / b if b != 0 else None
return sum_result, difference, product, quotient
# 接收多个返回值
add, sub, mul, div = operate_numbers(10, 5)
print(f"加: {add}, 减: {sub}, 乘: {mul}, 除: {div}")
# 也可以作为一个元组接收
results = operate_numbers(10, 5)
print(f"所有结果: {results}")
返回复杂数据结构
def analyze_text(text):
"""分析文本并返回统计信息"""
words = text.split()
characters = len(text)
word_count = len(words)
unique_words = len(set(words))
return {
'word_count': word_count,
'character_count': characters,
'unique_words': unique_words,
'average_word_length': characters / word_count if word_count > 0 else 0
}
analysis = analyze_text("Python is a great programming language")
print(analysis)
变量作用域
局部变量和全局变量
# 全局变量
global_var = "I'm global"
def test_scope():
# 局部变量
local_var = "I'm local"
print(f"函数内访问全局变量: {global_var}")
print(f"函数内访问局部变量: {local_var}")
# 修改全局变量需要使用 global 关键字
global global_var
global_var = "I'm modified"
test_scope()
print(f"函数外访问全局变量: {global_var}")
# print(f"函数外访问局部变量: {local_var}") # 这会报错!
嵌套函数和作用域
def outer_function(x):
"""外层函数"""
outer_var = "outer"
def inner_function(y):
"""内层函数"""
inner_var = "inner"
# 内层函数可以访问外层函数的变量
return f"{outer_var} - {inner_var} - {x + y}"
return inner_function
# 使用闭包
closure_func = outer_function(10)
result = closure_func(5)
print(result) # outer - inner - 15
高阶函数
函数作为参数
def apply_operation(numbers, operation):
"""对数字列表应用操作"""
return [operation(x) for x in numbers]
def square(x):
return x ** 2
def double(x):
return x * 2
numbers = [1, 2, 3, 4, 5]
squared = apply_operation(numbers, square)
doubled = apply_operation(numbers, double)
print(f"平方: {squared}") # [1, 4, 9, 16, 25]
print(f"加倍: {doubled}") # [2, 4, 6, 8, 10]
# 使用 lambda 表达式
cubed = apply_operation(numbers, lambda x: x ** 3)
print(f"立方: {cubed}") # [1, 8, 27, 64, 125]
函数作为返回值
def create_multiplier(factor):
"""创建乘法器函数"""
def multiplier(x):
return x * factor
return multiplier
# 创建特定的乘法器
double = create_multiplier(2)
triple = create_multiplier(3)
print(f"双倍: {double(5)}") # 10
print(f"三倍: {triple(5)}") # 15
# 在列表中使用
numbers = [1, 2, 3, 4, 5]
doubled_numbers = [double(x) for x in numbers]
print(f"加倍列表: {doubled_numbers}") # [2, 4, 6, 8, 10]
装饰器
基本装饰器
def my_decorator(func):
"""简单的装饰器"""
def wrapper():
print("函数执行前...")
func()
print("函数执行后...")
return wrapper
@my_decorator
def say_hello():
print("Hello!")
say_hello()
# 输出:
# 函数执行前...
# Hello!
# 函数执行后...
带参数的装饰器
实用的装饰器
def repeat(num_times):
"""重复执行函数的装饰器"""
def decorator_repeat(func):
def wrapper(*args, **kwargs):
for _ in range(num_times):
result = func(*args, **kwargs)
return result
return wrapper
return decorator_repeat
@repeat(num_times=3)
def greet(name):
print(f"Hello {name}")
greet("Alice")
# 输出:
# Hello Alice
# Hello Alice
# Hello Alice
import time
from functools import wraps
def timer(func):
"""计算函数执行时间的装饰器"""
@wraps(func)
def wrapper(*args, **kwargs):
start_time = time.time()
result = func(*args, **kwargs)
end_time = time.time()
print(f"{func.__name__} 执行时间: {end_time - start_time:.4f}秒")
return result
return wrapper
def debug(func):
"""调试装饰器,打印函数调用信息"""
@wraps(func)
def wrapper(*args, **kwargs):
args_repr = [repr(a) for a in args]
kwargs_repr = [f"{k}={v!r}" for k, v in kwargs.items()]
signature = ", ".join(args_repr + kwargs_repr)
print(f"调用 {func.__name__}({signature})")
result = func(*args, **kwargs)
print(f"{func.__name__} 返回: {result!r}")
return result
return wrapper
@timer
@debug
def slow_function(n):
"""模拟慢函数"""
time.sleep(1)
return n * 2
result = slow_function(5)
Lambda 表达式
基本用法
# 简单的 lambda 函数
square = lambda x: x ** 2
print(f"5的平方: {square(5)}") # 25
# 多参数 lambda
add = lambda x, y: x + y
print(f"3 + 4 = {add(3, 4)}") # 7
# 立即执行的 lambda
result = (lambda x: x * 2)(10)
print(f"立即执行: {result}") # 20
在高级函数中的应用
# 与 map() 一起使用
numbers = [1, 2, 3, 4, 5]
squared = list(map(lambda x: x ** 2, numbers))
print(f"映射平方: {squared}") # [1, 4, 9, 16, 25]
# 与 filter() 一起使用
even_numbers = list(filter(lambda x: x % 2 == 0, numbers))
print(f"过滤偶数: {even_numbers}") # [2, 4]
# 与 sorted() 一起使用
words = ['apple', 'banana', 'cherry', 'date']
sorted_by_length = sorted(words, key=lambda x: len(x))
print(f"按长度排序: {sorted_by_length}") # ['date', 'apple', 'banana', 'cherry']
生成器函数
基本生成器
def countdown(n):
"""倒计时生成器"""
while n > 0:
yield n
n -= 1
# 使用生成器
for number in countdown(5):
print(number) # 5, 4, 3, 2, 1
# 手动使用生成器
counter = countdown(3)
print(next(counter)) # 3
print(next(counter)) # 2
print(next(counter)) # 1
# print(next(counter)) # StopIteration
实用的生成器
def fibonacci(limit):
"""斐波那契数列生成器"""
a, b = 0, 1
count = 0
while count < limit:
yield a
a, b = b, a + b
count += 1
def read_large_file(file_path):
"""逐行读取大文件的生成器"""
with open(file_path, 'r', encoding='utf-8') as file:
for line in file:
yield line.strip()
# 使用斐波那契生成器
print("斐波那契数列:")
for num in fibonacci(10):
print(num, end=' ')
print()
# 模拟大文件读取
def mock_large_file():
"""模拟大文件数据"""
for i in range(5):
yield f"这是第 {i+1} 行数据"
print("\n模拟文件读取:")
for line in mock_large_file():
print(line)
错误处理
基本异常处理
def safe_divide(a, b):
"""安全的除法函数"""
try:
result = a / b
except ZeroDivisionError:
return "错误:除数不能为零"
except TypeError:
return "错误:参数类型不正确"
else:
return f"结果是: {result}"
finally:
print("除法操作完成")
print(safe_divide(10, 2)) # 结果是: 5.0
print(safe_divide(10, 0)) # 错误:除数不能为零
print(safe_divide(10, 'a')) # 错误:参数类型不正确
自定义异常
class NegativeNumberError(Exception):
"""自定义异常:负数错误"""
def __init__(self, value):
self.value = value
self.message = f"不允许负数: {value}"
super().__init__(self.message)
def calculate_square_root(x):
"""计算平方根,不允许负数"""
if x < 0:
raise NegativeNumberError(x)
return x ** 0.5
try:
result = calculate_square_root(9)
print(f"平方根: {result}")
result = calculate_square_root(-4) # 这会抛出异常
except NegativeNumberError as e:
print(f"捕获到自定义异常: {e}")
