#!/usr/local/bin/python import re import string AsciiStart = 0x01 AsciiEnd = 0x7F EucStart = 0xA1 EucEnd = 0xFE cas = cAsciiStart = chr(AsciiStart) cae = cAsciiEnd = chr(AsciiEnd) ces = cEucStart = chr(EucStart) cee = cEucEnd = chr(EucEnd) ac = AsciiChar = "[%s-%s]" % (cas,cae) ec = EucChar = "(?:[%s-%s][%s-%s])" % (ces,cee,ces,cee) ecng = EucChar = "(?:[%s-%s][%s-%s])" % (ces,cee,ces,cee) def scfunc(m): #もしマルチバイトを含まないのならそのままリターン if re.match("[%s*?]" % (ac),m.group()): return(m.group()) #マルチバイトを含む場合は展開する c = m.group()[1:-1] l = [] while(c): #範囲指定string class r = re.search("^(%s)-(%s)" % (ec,ec),c) if r: c1l = ord(r.group(1)[0]) c1t = ord(r.group(1)[1]) c2l = ord(r.group(2)[0]) c2t = ord(r.group(2)[1]) if(c1l == c2l): # [uy-uz] -> u[y-z] retstr = "(?:%s[%s-%s])" % (chr(c1l),chr(c1t),chr(c2t)) else: # [xb-zx] -> x[b-z]|y[a-z]|z[a-x] retstr = "(?:%s[%s-%s])" % (chr(c1l),chr(c1t),cee) for i in range(c1l+1,c2l): retstr = retstr + "|(?:%s[%s-%s])" % (chr(i),ces,cee) retstr = retstr + "|(?:%s[%s-%s])" % (chr(c2l),ces,chr(c2t)) l.append(retstr) c = r.string[:r.start()] + r.string[r.end():] continue #一文字のマルチバイト r = re.search("^%s" % (ec),c) if r: l.append("(?:"+r.group()+")") c = r.string[:r.start()] + r.string[r.end():] continue #Asciiの連続 r = re.search("^%s*" % (ac),c) if r: l.append("["+r.group()+"]") c = r.string[:r.start()] + r.string[r.end():] continue c = string.join(l,"|") c = "(?:" + c + ")" return(c) def makematchpattern(pattern): #end of line if re.search("\$$",pattern): eol = 1 pattern = pattern[:-1] else: eol = 0 #begenning of line if re.match("^\^",pattern): pattern = "^.*?(" + pattern[1:] + ")" else: pattern = "^.*?(" + pattern + ")" pattern = makesearchpattern(pattern) if eol: pattern = pattern + "$" return(pattern) def makesearchpattern(pattern): #end of line if re.search("\$$",pattern): eol = 1 pattern = pattern[:-1] else: eol = 0 #multi byte '.' pattern = re.sub("\.","(?:%s|%s)" % (ac,ec),pattern) #string class #2Byte文字を含むstring classを持ってくる scpattern = "\[(%s|%s)*?\]" % (ac,ec) pattern = re.sub(scpattern, scfunc, pattern) #multi byte char pattern = re.sub("%s" % (ec), lambda mobj:("("+mobj.group(0)+")") ,pattern) if eol: pattern = pattern + "$" return(pattern) def match(pattern,string,flags=None): pat = makematchpattern(pattern) if flags: return(re.match(pat,string,flags)) return(re.match(pat,string)) def search(pattern,string,flags=None): pat = makematchpattern(pattern) if flags: s = re.search(pat,string,flags) if not s: return(s) mobj = re.compile(makesearchpattern(pattern)) return(mobj.search(s.string,s.start(1),flags)) s = re.search(pat,string) if not s: return(s) mobj = re.compile(makesearchpattern(pattern)) return(mobj.search(s.string,s.start(1))) def findall(pattern,string): ret=[] m = search(pattern,string) while(m): ret.append(m.group()) m = m.re.search(string,m.end()) return(ret) def split(pattern,string,maxsplit=0): ret=[] m=search(pattern,string) spos = 0 nsplit = 0 while(m): if string[spos:m.start()]: ret.append(string[spos:m.start()]) nsplit = nsplit + 1 spos = m.end() if(nsplit == maxsplit): break m = m.re.search(string,m.end()) if string[spos:]: ret.append(string[spos:]) return(ret) def sub(pattern, repl, string, count=0): return(subn(pattern,repl,string,count)[0]) def subn(pattern, repl, string, count=0): import cStringIO ret = cStringIO.StringIO() m=search(pattern,string) spos = 0 nsub = 0 while(m): ret.write(string[spos:m.start()]) if(type(repl) == type("")): ret.write(repl) elif(type(repl) == type(sub)): ret.write(repl(m)) else: print "repl must be function or string." return(None) nsub = nsub + 1 spos = m.end() if(nsub == count): break m = m.re.search(string,m.end()) if string[spos:]: ret.write(string[spos:]) return((ret.getvalue(),nsub)) def compile(pattern,flags=None): print("kre cannot compile") return(None)