Python學習筆記-正規表達式

#正規表達式
#建立正規表達式對象
import re #導入庫
'''向 re.compile()傳入一個字元串值，表示正規表達式，它将傳回一個 Regex 模式
對象（或者就簡稱為 Regex 對象）。
'''
NumRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d')


#比對 Regex 對象
'''Regex 對象的 search()方法查找傳入的字元串，尋找該正規表達式的所有比對。如
果字元串中沒有找到該正規表達式模式，search()方法将傳回 None。如果找到了該模式，
search()方法将傳回一個 Match 對象。Match 對象有一個 group()方法，它傳回被查找字
符串中實際比對的文本（稍後我會解釋分組）。'''
num = re.compile(r'\d{3}-\d{3}-\d{4}')
mm = num.search('My number 877-989-5555 is 455-666-9999,999-555-8888.')
print('Phone number is :' + mm.group())

'''
1．用 import re 導入正規表達式子產品。
2．用 re.compile()函數建立一個 Regex 對象（記得使用原始字元串）。
3．向 Regex 對象的 search()方法傳入想查找的字元串。它傳回一個 Match 對象。
4．調用 Match 對象的 group()方法，傳回實際比對文本的字元串。'''

#用正規表達式比對更多模式
#利用括号分組
nn = re.compile(r'(\d\d\d)-(\d\d\d-\d\d\d\d)')
mu = nn.search('My number id 455-111-9898')
print(mu.group(1,2))
print(mu.group(2))
print(mu.group(0))
print(mu.group())

#用管道比對多個分組
#利用 findall()方法，可以找到“所有”比對的地方
#如果需要比對真正的管道字元，就用倒斜杠轉義，即\|
zx = re.compile(r'a | b')
mo1 = zx.search('a and b')
print(mo1.group())

zxx = re.compile(r'Bat(ds|cc|yeyr)')
mo2 = zxx.search('Batcc and fuck')
print(mo2.group())
print(mo2.group(1))

#用問号實作可選比對
#字元?表明它前面的分組在這個模式中是可選的。
tt = re.compile(r'Mo(dd)?|fddk')
pp1 = tt.search('The Adventures of Modd')
print(pp1.group())
pp2 = tt.search('The Adventures of Mofddk')
print(pp2.group())

phoneRegex = re.compile(r'(\d\d\d-)?\d\d\d-\d\d\d\d')
go1 = phoneRegex.search('My number is 415-555-4242')
print(go1.group())
go2 = phoneRegex.search('My number is 555-4242')
print(go2.group())

#用星号比對零次或多次
#*（稱為星号）意味着“比對零次或多次”，即星号之前的分組，可以在文本中出現任意次
aao1 = re.compile(r'Bo(xo)*xoo')
pp3 = aao1.search('The Boxoo')
print(pp3.group())
pp4 = aao1.search('The Boxoxoo')
print(pp4.group())
pp5 = aao1.search('The Boxoxoxoxoxoxoo')
print(pp5.group())

#用加号比對一次或多次
#*意味着“比對零次或多次”，+（加号）則意味着“比對一次或多次”。
aao2 = re.compile(r'To(xo)+xoo')
pp6 = aao2.search('The Toxoxooo')
print(pp6.group())
pp8 = aao2.search('The Toxoxoxoxoxoxoo')
print(pp8.group())
#pp7 = aao2.search('The Toxoo')
#print(pp7.group())

#用花括号比對特定次數
aao3 = re.compile(r'(ha){3}')
pp9 = aao3.search('The hahaha')
print(pp9.group())
#pp10 = aao3.search('The haha')
#print(pp10.group())

#貪心和非貪心比對
aao4 = re.compile(r'(ha){3,6}')
ppp1 = aao4.search('The hahahahaha')
print(ppp1.group())

aao5 = re.compile(r'(ha){3,6}?')
ppp2 = aao5.search('The hahahahaha')
print(ppp2.group())

#findall()方法
'''除了search方法外，Regex對象也有一個findall()方法。search()将傳回一個Match
對象，包含被查找字元串中的“第一次”比對的文本，而 findall()方法将傳回一組
字元串，包含被查找字元串中的所有比對。'''
aao6 = re.compile(r'\d{3}-\d{3}-\d{4}')
print(aao6.findall('call 123-456-7897 and 444-555-6666'))

aao7 = re.compile(r'(\d{3})-(\d{3})-(\d{4})')
print(aao7.findall('call 123-456-7897 and 444-555-6666'))

'''
總結：
1．如果調用在一個沒有分組的正規表達式上，例如\d\d\d-\d\d\d-\d\d\d\d，方法
findall()将傳回一個比對字元串的清單，例如['123-456-7897', '444-555-6666']。
2．如果調用在一個有分組的正規表達式上，例如(\d\d\d)-(\d\d\d)-(\d\d\d\d)，方
法 findall()将傳回一個字元串的元組的清單（每個分組對應一個字元串），例如[('123', '456', '7897')
, ('444', '555', '6666')]。'''

#字元分類
#\d ->0 到 9 的任何數字 \D ->除 0 到 9 的數字以外的任何字元
#\w ->任何字母、數字或下劃線字元（可以認為是比對“單詞”字元） \W ->除字母、數字和下劃線以外的任何字元
#\s ->空格、制表符或換行符（可以認為是比對“空白”字元） \S ->除空格、制表符和換行符以外的任何字元
#建立自己的字元分類
value = re.compile(r'[abcABC]')
print(value.findall('dhascadkaadcmlaclaancxbcchaACBkcBdCidfA'))

#插入字元和美元字元^ :開頭: $:結尾

#在正規表達式中，.（句點）字元稱為“通配符”。
atRegex = re.compile(r'.at')
print(atRegex.findall('The cat in the hat sat on the flat mat.'))

#用點-星比對所有字元
nameRegex = re.compile(r'First Name: (.*) Last Name: (.*)')
jj = nameRegex.search('First Name: Al Last Name: Sweigart')
print(jj.group())

#點-星使用“貪心”模式：它總是比對盡可能多的文本。要用“非貪心”模式比對所有文本，就使用點-星和問号。
ftx1 = re.compile(r'<.*?>')
tx = ftx1.search('<To dj> and <cx sd>')
print(tx.group())
ftx2 = re.compile(r'<.*>')
tx1 = ftx2.search('<dgdgds> for <dfsdfdsf>')
print(tx1.group())

#用句點字元比對換行
'''點-星将比對除換行外的所有字元。通過傳入 re.DOTALL 作為 re.compile()的第
二個參數，可以讓句點字元比對所有字元，包括換行字元。'''
noNewlineRegex = re.compile('.*')
print(noNewlineRegex.search('Serve the public trust.\nProtect the innocent.'
                      '\nUphold the law.').group())
newlineRegex1 = re.compile('.*', re.DOTALL)
print(newlineRegex1.search('Serve the public trust.\nProtect the innocent.'
                     '\nUphold the law.').group())
#不區分大小寫的比對
#要讓正規表達式不區分大小寫，可以向 re.compile()傳入 re.IGNORECASE 或 re.I，作為第二個參數
robocop = re.compile(r'robocop', re.I)
print( robocop.search('RoboCop is part man, part machine, all cop.').group())

#用 sub()方法替換字元串
'''Regex對象的 sub()方法需要傳入兩個參數。第一個參數是一個字元串，用于取代發現的匹
配。第二個參數是一個字元串，即正規表達式。
有時候，你可能需要使用比對的文本本身，作為替換的一部分。在 sub()的第一
個參數中，可以輸入\1、\2、\3……。表示“在替換中輸入分組 1、2、3……的文本”'''
namesRegex2 = re.compile(r'Agent \w+')
print(namesRegex2.sub('CENSORED', 'Agent Alice gave the secret documents to Agent Bob.'))
agentNamesRegex = re.compile(r'Agent (\w)\w*')
print(agentNamesRegex.sub(r'\1****', 'Agent Alice told Agent Carol that Agent Eve knew Agent Bob was a double agent.'))

#管理複雜的正規表達式  可以向 re.compile()傳入變量 re.VERBOSE，作為第二個參數

#組合使用 re.IGNOREC ASE、re.DOTALL 和 re.VERBOSE
#可以使用管道字元（|）将變量組合起來
someRegexValue1 = re.compile('foo', re.IGNORECASE | re.DOTALL)
someRegexValue2 = re.compile('foo', re.IGNORECASE | re.DOTALL | re.VERBOSE)

Python學習筆記-正規表達式

繼續閱讀

無法解析的外部符号 wmain，該符号在函數 "void cdecl mainCRTStartupHelper(struct HINSTANCE *,unsigned short con......

TestLink導出用例轉換工具(XML2Excel)

YAML簡介和PyYAML安全操作YAML支援的類型YAML的優點：yaml的基本文法python操作

Small tricks

libsvm for python 安裝

學習軟體測試基礎測試第七天

Zeppelin 配置通路 REST APIApache Zeppelin Configuration REST API

【Torch】最簡潔logging使用指南

27. Remove Element(清單)題目代碼

neo4j之cypher使用文檔

Cloud Studio初體驗

使用 ctypes 進行 Python 和 C 的混合程式設計

【python】【資料處理】畫多元資料分布圖

【python】netconf協定對接管理裝置

「Python 網絡自動化」NETCONF —— Python 使用 NETCONF 管理配置 H3C 網絡裝置

在python中建立excel并寫入