正则表达式

参考 https://deerchao.cn/tutorials/regex/regex.htm

常用的操作

使用re替换的函数

1
2
3
4
5
6
7
8
9
10
11
12
import re

inputStr = 'hello 234 world 567 额外rwe2121'
def _add111(matched):
intStr = matched.group("number")
intValue = int(intStr)
addedValue = intValue + 111
addedValueStr = str(addedValue)
return addedValueStr
replacedStr = re.sub("(?P<number>\d+)", _add111, inputStr);
print(replacedStr)

1
2
3
4
5
6
7
8
9
def replace(matched):
group_str = matched.group("page")
addedValue = group_str+"_{}"
addedValueStr = str(addedValue)
return addedValueStr


replacedStr = re.sub(r"\w{1}/(?P<page>\w+).js", replace, inputStr);
print(replacedStr)
1
2
3
4
5
6
7
8
9
10
11
import re

inputStr = 'http://www.tvibe.cn/revision/play/album?albumId=20365308&pageNum=2&sort=1&pageSize=30'
def _next_page(matched):
intStr = matched.group("number")
intValue = int(intStr)
addedValue = intValue + 1
addedValueStr = "pageNum=" + str(addedValue)
return addedValueStr
next_page_url = re.sub("pageNum=(?P<number>\d+)", _next_page, inputStr)
print(next_page_url)

零宽断言

1
2
3
4
5
6
7
8
9
10
11
12
13
import re

info = """
No waywanteh eetehsdd kkl
"""
regex1 = "\w+(?=e)" # ['want', 'eet']
regex2 = "\w+(?<=e)" # ['wante', 'eete']
regex3 = r"\b(?!way)\w+" # ['No', 'way', 'wanteh', 'kkl']
regex4 = r"\w+(?<!eh)\b" # ['No', 'eetehsdd', 'kkl']
pattern = re.compile(regex3, re.M)
result = pattern.findall(info)
print(result)