Python正则表达式

感觉重学Python,太久不用。。幸好有笔记

正则表达式是什么

正则表达式是一个特殊的字符序列,一个字符串是否与我们所设定的这的字符序列,相匹配
快速检索文本,实现一些替换文本的操作

使用简单示例

1
2
3
4
5
6
7
8
9
10
11
12
13
import re

a = 'python|java|C#'

#print(a.index('java')>-1)

r = re.findall('python',a)
print(r)
# 规则
if len(r) > 0:
print('字符串中包含python~')
else:
print('No')

正则表达式使用

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import re

# a = 'python0java2C#'
# r = re.findall('\d',a)
# print(r)
# 'Python' 普通字符 '\d' 元字符


#[] [cdef] [^cdef] [a-f]
# s = 'abc, acc, adc, afc, ahc'
# r = re.findall('a[^fc]c',s)
# print(r)

# 概括字符集
# \d \D [0-9]
# \w [A-Z,a-z,0-9,_] \W [&,\t,\n,\r,' ']
# \s 空白字符 \S 非空白字符
# . 匹配除换行符\n以外的所有字符
# a = 'python1111java678php'
# r = re.findall('[0-9]',a)
# print(r)


# 数量词
# a = 'python 1111java678php'
# r = re.findall('[a-z]{3,6}?',a)
# 贪婪 与 非贪婪(?加)
# python(贪婪)
# print(r)

# * 匹配0次或无限多次
# a = 'pytho0python1pythonn2'
# r = re.findall('python*',a)
# print(r)

# + 匹配1次或无限多次
# a = 'pytho0python1pythonn2'
# r = re.findall('python+',a)
# print(r)

# ? 匹配0次或1次
# a = 'pytho0python1pythonn2'
# r = re.findall('python?',a)
# print(r)

# 边界匹配
# qq = '10000000001'
# 4~8
# r = re.findall('^\d{4,8}$',qq)
# r = re.findall('000$',qq)
# print(r)

# 组
# a = 'pythonpythonpythonpython'
# r = re.findall('(python){3}',a)
# print(r)

# I 忽略大小写
# S 匹配所有字符
# langue = 'PythonC#JavaPHP'
# r = re.findall('c#',langue, re.I)

langue = 'PythonC#\nJavaPHP'
r = re.findall('c#.{1}',langue, re.I | re.S)
print(r)

字符串替换函数使用

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import re

# count 0:全部替换 1:一次替换
# langue = 'PythonC#JavaPHP'
# r = re.sub('C#','GO',langue,0)
# print(r)
# print(langue)

# def convert(value):
# print(value) # <_sre.SRE_Match object; span=(6, 8), match='C#'>
# matched = value.group()
# return '!!'+matched+'!!'

# langue = 'PythonC#JavaPHP'
# r = re.sub('C#',convert,langue,0)
# print(r) # Python!!C#!!JavaPHP

s = 'A8C37232D86'

def convert(value):
matched = value.group()
if int(matched) >= 6:
return '9'
else:
return '0'

r = re.sub('\d',convert,s)
print(r)

search(),match()函数使用

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import re

# s = '8C37232D86'

# 从首字符开始匹配
# r = re.match('\d', s);
# print(r)
# print(r.span()) # 获取匹配的位置

# 从全部字符开始匹配
# r1 = re.search('\d', s);
# print(r1)
# print(r1.group()) # 获取匹配的字符

# # 匹配所有的字符
# r2 = re.findall('\d',s);
# print(r2)

# s1 = 'life is short,i use python'
# r3 = re.search('life(.*)python',s1)
# print(r3.group(1)) # 0 代表完整匹配

# r4 = re.findall('life(.*)python',s1)
# print(r4)

# s2 = 'life is short,i use python,i love python'
# r5 = re.search('life(.*)python(.*)python',s2)
# print(r5.group(0))
# print(r5.group(1))
# print(r5.group(2))
# print(r5.group(0,1,2))
# print(r5.groups()) # 打印各组的数据