[{"data":1,"prerenderedAt":963},["ShallowReactive",2],{"blog-\u002Fblog\u002Fpython-regex-explained":3},{"id":4,"title":5,"body":6,"description":949,"difficulty":950,"extension":951,"framework":952,"frameworkSlug":58,"meta":953,"navigation":80,"order":66,"path":954,"qaPath":955,"seo":956,"stem":957,"subtopic":958,"topic":959,"topicSlug":960,"updated":961,"__hash__":962},"blog\u002Fblog\u002Fpython-regex-explained.md","Python Regular Expressions Explained — The re Module, Groups, and Common Patterns",{"type":7,"value":8,"toc":939},"minimark",[9,14,26,30,53,134,140,144,175,257,275,279,302,459,463,473,580,584,602,747,751,766,832,839,876,880,935],[10,11,13],"h2",{"id":12},"python-regular-expressions-explained","Python regular expressions, explained",[15,16,17,18,22,23,25],"p",{},"Regular expressions match patterns in text, and Python's ",[19,20,21],"code",{},"re"," module is the standard tool.\nThe hard part isn't the regex syntax itself but knowing which ",[19,24,21],{}," function to call and how\ngroups work. This guide covers the everyday API and the traps that catch people.",[10,27,29],{"id":28},"always-use-raw-strings","Always use raw strings",[15,31,32,33,36,37,36,40,43,44,48,49,52],{},"Regex uses backslashes heavily (",[19,34,35],{},"\\d",", ",[19,38,39],{},"\\b",[19,41,42],{},"\\w","), and so do Python string escapes. Write\npatterns as ",[45,46,47],"strong",{},"raw strings"," (",[19,50,51],{},"r\"...\"",") so the backslashes reach the regex engine intact.",[54,55,60],"pre",{"className":56,"code":57,"language":58,"meta":59,"style":59},"language-python shiki shiki-themes github-light github-dark","import re\n\nre.search(r\"\\bword\\b\", text)    # raw string — \\b is a word boundary\nre.search(\"\\bword\\b\", text)     # bug! \\b is a backspace char to Python first\n","python","",[19,61,62,75,82,113],{"__ignoreMap":59},[63,64,67,71],"span",{"class":65,"line":66},"line",1,[63,68,70],{"class":69},"szBVR","import",[63,72,74],{"class":73},"sVt8B"," re\n",[63,76,78],{"class":65,"line":77},2,[63,79,81],{"emptyLinePlaceholder":80},true,"\n",[63,83,85,88,91,95,98,102,104,106,109],{"class":65,"line":84},3,[63,86,87],{"class":73},"re.search(",[63,89,90],{"class":69},"r",[63,92,94],{"class":93},"sZZnC","\"",[63,96,39],{"class":97},"sj4cs",[63,99,101],{"class":100},"sA_wV","word",[63,103,39],{"class":97},[63,105,94],{"class":93},[63,107,108],{"class":73},", text)    ",[63,110,112],{"class":111},"sJ8bj","# raw string — \\b is a word boundary\n",[63,114,116,118,120,122,124,126,128,131],{"class":65,"line":115},4,[63,117,87],{"class":73},[63,119,94],{"class":93},[63,121,39],{"class":97},[63,123,101],{"class":93},[63,125,39],{"class":97},[63,127,94],{"class":93},[63,129,130],{"class":73},", text)     ",[63,132,133],{"class":111},"# bug! \\b is a backspace char to Python first\n",[15,135,136,137,139],{},"Make ",[19,138,51],{}," a reflex for every pattern — it avoids a whole class of silent mismatches.",[10,141,143],{"id":142},"match-vs-search-vs-fullmatch","match vs search vs fullmatch",[15,145,146,147,151,152,155,156,36,159,162,163,166,167,170,171,174],{},"These three differ in ",[148,149,150],"em",{},"where"," they look. ",[19,153,154],{},"match"," anchors at the ",[45,157,158],{},"start",[19,160,161],{},"search"," finds the\npattern ",[45,164,165],{},"anywhere",", and ",[19,168,169],{},"fullmatch"," requires the ",[45,172,173],{},"whole"," string to match.",[54,176,178],{"className":56,"code":177,"language":58,"meta":59,"style":59},"re.match(r\"\\d+\", \"123abc\")      # matches \"123\" — start only\nre.search(r\"\\d+\", \"abc123\")     # matches \"123\" — anywhere\nre.fullmatch(r\"\\d+\", \"123abc\")  # None — not the entire string\n",[19,179,180,207,232],{"__ignoreMap":59},[63,181,182,185,187,189,191,194,196,198,201,204],{"class":65,"line":66},[63,183,184],{"class":73},"re.match(",[63,186,90],{"class":69},[63,188,94],{"class":93},[63,190,35],{"class":97},[63,192,193],{"class":69},"+",[63,195,94],{"class":93},[63,197,36],{"class":73},[63,199,200],{"class":93},"\"123abc\"",[63,202,203],{"class":73},")      ",[63,205,206],{"class":111},"# matches \"123\" — start only\n",[63,208,209,211,213,215,217,219,221,223,226,229],{"class":65,"line":77},[63,210,87],{"class":73},[63,212,90],{"class":69},[63,214,94],{"class":93},[63,216,35],{"class":97},[63,218,193],{"class":69},[63,220,94],{"class":93},[63,222,36],{"class":73},[63,224,225],{"class":93},"\"abc123\"",[63,227,228],{"class":73},")     ",[63,230,231],{"class":111},"# matches \"123\" — anywhere\n",[63,233,234,237,239,241,243,245,247,249,251,254],{"class":65,"line":84},[63,235,236],{"class":73},"re.fullmatch(",[63,238,90],{"class":69},[63,240,94],{"class":93},[63,242,35],{"class":97},[63,244,193],{"class":69},[63,246,94],{"class":93},[63,248,36],{"class":73},[63,250,200],{"class":93},[63,252,253],{"class":73},")  ",[63,255,256],{"class":111},"# None — not the entire string\n",[15,258,259,260,263,264,267,268,271,272,274],{},"All return a ",[45,261,262],{},"match object"," (truthy) or ",[19,265,266],{},"None",", so they're used in ",[19,269,270],{},"if"," tests. A common\nbug is expecting ",[19,273,154],{}," to scan the whole string — it doesn't.",[10,276,278],{"id":277},"extracting-with-groups","Extracting with groups",[15,280,281,282,285,286,289,290,293,294,297,298,301],{},"Parentheses create ",[45,283,284],{},"capturing groups",". ",[19,287,288],{},".group(0)"," is the whole match; ",[19,291,292],{},".group(n)"," is the\nnth group. ",[45,295,296],{},"Named groups"," ",[19,299,300],{},"(?P\u003Cname>...)"," make the result self-documenting.",[54,303,305],{"className":56,"code":304,"language":58,"meta":59,"style":59},"m = re.search(r\"(\\d{4})-(\\d{2})-(\\d{2})\", \"date: 2026-06-19\")\nm.group(0)      # '2026-06-19'  — full match\nm.group(1)      # '2026'        — first group\nm.groups()      # ('2026', '06', '19')\n\nm = re.search(r\"(?P\u003Cyear>\\d{4})-(?P\u003Cmonth>\\d{2})\", \"2026-06\")\nm.group(\"year\") # '2026'\n",[19,306,307,359,372,384,392,397,445],{"__ignoreMap":59},[63,308,309,312,315,318,320,322,325,328,331,334,336,339,341,343,345,347,349,351,353,356],{"class":65,"line":66},[63,310,311],{"class":73},"m ",[63,313,314],{"class":69},"=",[63,316,317],{"class":73}," re.search(",[63,319,90],{"class":69},[63,321,94],{"class":93},[63,323,324],{"class":97},"(\\d",[63,326,327],{"class":69},"{4}",[63,329,330],{"class":97},")",[63,332,333],{"class":100},"-",[63,335,324],{"class":97},[63,337,338],{"class":69},"{2}",[63,340,330],{"class":97},[63,342,333],{"class":100},[63,344,324],{"class":97},[63,346,338],{"class":69},[63,348,330],{"class":97},[63,350,94],{"class":93},[63,352,36],{"class":73},[63,354,355],{"class":93},"\"date: 2026-06-19\"",[63,357,358],{"class":73},")\n",[63,360,361,364,367,369],{"class":65,"line":77},[63,362,363],{"class":73},"m.group(",[63,365,366],{"class":97},"0",[63,368,203],{"class":73},[63,370,371],{"class":111},"# '2026-06-19'  — full match\n",[63,373,374,376,379,381],{"class":65,"line":84},[63,375,363],{"class":73},[63,377,378],{"class":97},"1",[63,380,203],{"class":73},[63,382,383],{"class":111},"# '2026'        — first group\n",[63,385,386,389],{"class":65,"line":115},[63,387,388],{"class":73},"m.groups()      ",[63,390,391],{"class":111},"# ('2026', '06', '19')\n",[63,393,395],{"class":65,"line":394},5,[63,396,81],{"emptyLinePlaceholder":80},[63,398,400,402,404,406,408,410,413,417,419,421,423,425,427,430,432,434,436,438,440,443],{"class":65,"line":399},6,[63,401,311],{"class":73},[63,403,314],{"class":69},[63,405,317],{"class":73},[63,407,90],{"class":69},[63,409,94],{"class":93},[63,411,412],{"class":97},"(",[63,414,416],{"class":415},"s9eBZ","?P\u003Cyear>",[63,418,35],{"class":97},[63,420,327],{"class":69},[63,422,330],{"class":97},[63,424,333],{"class":100},[63,426,412],{"class":97},[63,428,429],{"class":415},"?P\u003Cmonth>",[63,431,35],{"class":97},[63,433,338],{"class":69},[63,435,330],{"class":97},[63,437,94],{"class":93},[63,439,36],{"class":73},[63,441,442],{"class":93},"\"2026-06\"",[63,444,358],{"class":73},[63,446,448,450,453,456],{"class":65,"line":447},7,[63,449,363],{"class":73},[63,451,452],{"class":93},"\"year\"",[63,454,455],{"class":73},") ",[63,457,458],{"class":111},"# '2026'\n",[10,460,462],{"id":461},"finding-all-matches","Finding all matches",[15,464,465,468,469,472],{},[19,466,467],{},"findall"," returns a list of matches (or of group tuples if the pattern has groups).\n",[19,470,471],{},"finditer"," yields match objects lazily, which is better when you need positions or it's a\nlarge input.",[54,474,476],{"className":56,"code":475,"language":58,"meta":59,"style":59},"re.findall(r\"\\d+\", \"a1 b22 c333\")           # ['1', '22', '333']\nre.findall(r\"(\\w)=(\\d)\", \"a=1 b=2\")          # [('a','1'), ('b','2')] — group tuples\n\nfor m in re.finditer(r\"\\d+\", \"a1 b22\"):\n    print(m.group(), m.start())              # value and index\n",[19,477,478,504,533,537,569],{"__ignoreMap":59},[63,479,480,483,485,487,489,491,493,495,498,501],{"class":65,"line":66},[63,481,482],{"class":73},"re.findall(",[63,484,90],{"class":69},[63,486,94],{"class":93},[63,488,35],{"class":97},[63,490,193],{"class":69},[63,492,94],{"class":93},[63,494,36],{"class":73},[63,496,497],{"class":93},"\"a1 b22 c333\"",[63,499,500],{"class":73},")           ",[63,502,503],{"class":111},"# ['1', '22', '333']\n",[63,505,506,508,510,512,515,517,520,522,524,527,530],{"class":65,"line":77},[63,507,482],{"class":73},[63,509,90],{"class":69},[63,511,94],{"class":93},[63,513,514],{"class":97},"(\\w)",[63,516,314],{"class":100},[63,518,519],{"class":97},"(\\d)",[63,521,94],{"class":93},[63,523,36],{"class":73},[63,525,526],{"class":93},"\"a=1 b=2\"",[63,528,529],{"class":73},")          ",[63,531,532],{"class":111},"# [('a','1'), ('b','2')] — group tuples\n",[63,534,535],{"class":65,"line":84},[63,536,81],{"emptyLinePlaceholder":80},[63,538,539,542,545,548,551,553,555,557,559,561,563,566],{"class":65,"line":115},[63,540,541],{"class":69},"for",[63,543,544],{"class":73}," m ",[63,546,547],{"class":69},"in",[63,549,550],{"class":73}," re.finditer(",[63,552,90],{"class":69},[63,554,94],{"class":93},[63,556,35],{"class":97},[63,558,193],{"class":69},[63,560,94],{"class":93},[63,562,36],{"class":73},[63,564,565],{"class":93},"\"a1 b22\"",[63,567,568],{"class":73},"):\n",[63,570,571,574,577],{"class":65,"line":394},[63,572,573],{"class":97},"    print",[63,575,576],{"class":73},"(m.group(), m.start())              ",[63,578,579],{"class":111},"# value and index\n",[10,581,583],{"id":582},"substituting-with-resub","Substituting with re.sub",[15,585,586,589,590,593,594,597,598,601],{},[19,587,588],{},"re.sub"," replaces matches. The replacement can reference groups (",[19,591,592],{},"\\1"," or ",[19,595,596],{},"\\g\u003Cname>",") or be a\n",[45,599,600],{},"function"," for computed replacements.",[54,603,605],{"className":56,"code":604,"language":58,"meta":59,"style":59},"re.sub(r\"\\s+\", \" \", \"too   many    spaces\")     # 'too many spaces'\nre.sub(r\"(\\d{4})-(\\d{2})\", r\"\\2\u002F\\1\", \"2026-06\")  # '06\u002F2026' — reorder groups\n\n# function replacement:\nre.sub(r\"\\d+\", lambda m: str(int(m.group()) * 2), \"a1 b2\")   # 'a2 b4'\n",[19,606,607,638,687,691,696],{"__ignoreMap":59},[63,608,609,612,614,616,619,621,623,625,628,630,633,635],{"class":65,"line":66},[63,610,611],{"class":73},"re.sub(",[63,613,90],{"class":69},[63,615,94],{"class":93},[63,617,618],{"class":97},"\\s",[63,620,193],{"class":69},[63,622,94],{"class":93},[63,624,36],{"class":73},[63,626,627],{"class":93},"\" \"",[63,629,36],{"class":73},[63,631,632],{"class":93},"\"too   many    spaces\"",[63,634,228],{"class":73},[63,636,637],{"class":111},"# 'too many spaces'\n",[63,639,640,642,644,646,648,650,652,654,656,658,660,662,664,666,668,671,674,676,678,680,682,684],{"class":65,"line":77},[63,641,611],{"class":73},[63,643,90],{"class":69},[63,645,94],{"class":93},[63,647,324],{"class":97},[63,649,327],{"class":69},[63,651,330],{"class":97},[63,653,333],{"class":100},[63,655,324],{"class":97},[63,657,338],{"class":69},[63,659,330],{"class":97},[63,661,94],{"class":93},[63,663,36],{"class":73},[63,665,90],{"class":69},[63,667,94],{"class":93},[63,669,670],{"class":415},"\\2",[63,672,673],{"class":100},"\u002F",[63,675,592],{"class":415},[63,677,94],{"class":93},[63,679,36],{"class":73},[63,681,442],{"class":93},[63,683,253],{"class":73},[63,685,686],{"class":111},"# '06\u002F2026' — reorder groups\n",[63,688,689],{"class":65,"line":84},[63,690,81],{"emptyLinePlaceholder":80},[63,692,693],{"class":65,"line":115},[63,694,695],{"class":111},"# function replacement:\n",[63,697,698,700,702,704,706,708,710,712,715,718,721,723,726,729,732,735,738,741,744],{"class":65,"line":394},[63,699,611],{"class":73},[63,701,90],{"class":69},[63,703,94],{"class":93},[63,705,35],{"class":97},[63,707,193],{"class":69},[63,709,94],{"class":93},[63,711,36],{"class":73},[63,713,714],{"class":69},"lambda",[63,716,717],{"class":73}," m: ",[63,719,720],{"class":97},"str",[63,722,412],{"class":73},[63,724,725],{"class":97},"int",[63,727,728],{"class":73},"(m.group()) ",[63,730,731],{"class":69},"*",[63,733,734],{"class":97}," 2",[63,736,737],{"class":73},"), ",[63,739,740],{"class":93},"\"a1 b2\"",[63,742,743],{"class":73},")   ",[63,745,746],{"class":111},"# 'a2 b4'\n",[10,748,750],{"id":749},"greedy-vs-non-greedy-and-compiling","Greedy vs non-greedy, and compiling",[15,752,753,754,757,758,761,762,765],{},"By default quantifiers are ",[45,755,756],{},"greedy"," — they match as much as possible. Add ",[19,759,760],{},"?"," to make them\n",[45,763,764],{},"lazy",". This is the most common \"why did it match too much\" bug.",[54,767,769],{"className":56,"code":768,"language":58,"meta":59,"style":59},"re.search(r\"\u003C.*>\", \"\u003Ca>\u003Cb>\").group()    # '\u003Ca>\u003Cb>' — greedy, grabs everything\nre.search(r\"\u003C.*?>\", \"\u003Ca>\u003Cb>\").group()   # '\u003Ca>'    — lazy, stops early\n",[19,770,771,803],{"__ignoreMap":59},[63,772,773,775,777,779,782,785,787,790,792,794,797,800],{"class":65,"line":66},[63,774,87],{"class":73},[63,776,90],{"class":69},[63,778,94],{"class":93},[63,780,781],{"class":100},"\u003C",[63,783,784],{"class":97},".",[63,786,731],{"class":69},[63,788,789],{"class":100},">",[63,791,94],{"class":93},[63,793,36],{"class":73},[63,795,796],{"class":93},"\"\u003Ca>\u003Cb>\"",[63,798,799],{"class":73},").group()    ",[63,801,802],{"class":111},"# '\u003Ca>\u003Cb>' — greedy, grabs everything\n",[63,804,805,807,809,811,813,815,818,820,822,824,826,829],{"class":65,"line":77},[63,806,87],{"class":73},[63,808,90],{"class":69},[63,810,94],{"class":93},[63,812,781],{"class":100},[63,814,784],{"class":97},[63,816,817],{"class":69},"*?",[63,819,789],{"class":100},[63,821,94],{"class":93},[63,823,36],{"class":73},[63,825,796],{"class":93},[63,827,828],{"class":73},").group()   ",[63,830,831],{"class":111},"# '\u003Ca>'    — lazy, stops early\n",[15,833,834,835,838],{},"When reusing a pattern many times (e.g. in a loop), ",[45,836,837],{},"compile"," it once for clarity and a\nsmall speedup:",[54,840,842],{"className":56,"code":841,"language":58,"meta":59,"style":59},"pat = re.compile(r\"\\d+\")\npat.findall(text)\npat.search(other)\n",[19,843,844,866,871],{"__ignoreMap":59},[63,845,846,849,851,854,856,858,860,862,864],{"class":65,"line":66},[63,847,848],{"class":73},"pat ",[63,850,314],{"class":69},[63,852,853],{"class":73}," re.compile(",[63,855,90],{"class":69},[63,857,94],{"class":93},[63,859,35],{"class":97},[63,861,193],{"class":69},[63,863,94],{"class":93},[63,865,358],{"class":73},[63,867,868],{"class":65,"line":77},[63,869,870],{"class":73},"pat.findall(text)\n",[63,872,873],{"class":65,"line":84},[63,874,875],{"class":73},"pat.search(other)\n",[10,877,879],{"id":878},"recap","Recap",[15,881,882,883,48,885,887,888,892,893,897,898,902,903,905,906,909,910,912,913,673,917,921,922,926,927,929,930,166,932,934],{},"Use ",[45,884,47],{},[19,886,51],{},") for every pattern. Pick the right function: ",[45,889,890],{},[19,891,154],{},"\n(start), ",[45,894,895],{},[19,896,161],{}," (anywhere), ",[45,899,900],{},[19,901,169],{}," (whole string) — all return a match object\nor ",[19,904,266],{},". Capture data with ",[45,907,908],{},"groups"," (positional or ",[19,911,300],{},"), get every hit with\n",[45,914,915],{},[19,916,467],{},[45,918,919],{},[19,920,471],{},", and rewrite text with ",[45,923,924],{},[19,925,588],{}," (group refs or a function).\nRemember quantifiers are ",[45,928,756],{}," unless you add ",[19,931,760],{},[45,933,837],{}," patterns you reuse.",[936,937,938],"style",{},"html pre.shiki code .szBVR, html code.shiki .szBVR{--shiki-default:#D73A49;--shiki-dark:#F97583}html pre.shiki code .sVt8B, html code.shiki .sVt8B{--shiki-default:#24292E;--shiki-dark:#E1E4E8}html pre.shiki code .sZZnC, html code.shiki .sZZnC{--shiki-default:#032F62;--shiki-dark:#9ECBFF}html pre.shiki code .sj4cs, html code.shiki .sj4cs{--shiki-default:#005CC5;--shiki-dark:#79B8FF}html pre.shiki code .sA_wV, html code.shiki .sA_wV{--shiki-default:#032F62;--shiki-dark:#DBEDFF}html pre.shiki code .sJ8bj, html code.shiki .sJ8bj{--shiki-default:#6A737D;--shiki-dark:#6A737D}html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html pre.shiki code .s9eBZ, html code.shiki .s9eBZ{--shiki-default:#22863A;--shiki-dark:#85E89D}",{"title":59,"searchDepth":77,"depth":77,"links":940},[941,942,943,944,945,946,947,948],{"id":12,"depth":77,"text":13},{"id":28,"depth":77,"text":29},{"id":142,"depth":77,"text":143},{"id":277,"depth":77,"text":278},{"id":461,"depth":77,"text":462},{"id":582,"depth":77,"text":583},{"id":749,"depth":77,"text":750},{"id":878,"depth":77,"text":879},"How to use Python's re module — match vs search vs findall, capturing and named groups, substitution with re.sub, compiling patterns, and why raw strings and non-greedy matching matter.","medium","md","Python",{},"\u002Fblog\u002Fpython-regex-explained","\u002Fpython\u002Fstdlib\u002Fregex",{"title":5,"description":949},"blog\u002Fpython-regex-explained","Regular Expressions","Standard Library Essentials","stdlib","2026-06-19","RDUvm_XIMW5b0qTx7oE0QZZbhyDQNRYi0MbgXmG5Dg4",1782244092319]