202 lines
5.9 KiB
Plaintext
202 lines
5.9 KiB
Plaintext
{*******************************************************}
|
|
{ }
|
|
{ Tocsg.PCRE }
|
|
{ }
|
|
{ Copyright (C) 2022 sunk }
|
|
{ }
|
|
{*******************************************************}
|
|
|
|
unit Tocsg.PCRE;
|
|
|
|
interface
|
|
|
|
uses
|
|
Tocsg.Obj, Classes, SysUtils,
|
|
WinApi.Windows, System.RegularExpressions;
|
|
|
|
const
|
|
PT_RX_IDNUM = '\d{2}(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])\W?[1-4]\d{6}'; // or \d{2}(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])-[1-4]\d{7}
|
|
//3 : JCB나 다이너스, 4 : 비자, 5 : 마스터, 6 : 중국의 은련 카드, 9 : 세계 공통 국내 전용카드
|
|
PT_RX_CARDNUM = '(3|4|5|6|9)\d{3}\W?\d{4}\W?\d{4}\W?\d{4}';
|
|
PT_RX_EMAIL = '[\w\-]+@(?:(?:[\w\-]{2,}\.)+[a-zA-Z]{2,})';
|
|
//EMAIL = [_a-zA-Z0-9-]+([-+.][_a-zA-Z0-9]+)*[\^ &%*@]{0,2}\@[\^ &%*@]{0,2}[_a-zA-Z0-9]+([-.][_a-zA-Z0-9]+)*\.[-a-zA-Z0-9]+([-.][_a-zA-Z0-9]+)*;
|
|
// PT_RX_URL = '((ftp|https?)://|www)([a-zA-Z_0-9\W]*)[-\w.]+(/([\w/_.]*(\?\S+)?)?)?'; // 이건 매우좋지 않다. x 않좋은 예라 남겨놓음
|
|
// PT_RX_URL = '((?:https?://)?(?:www\.)?[-a-z\d]{1,9}\.[-a-z\d]{2,5}(?:\.[-a-z\d]{2,4})?)'; // 이건 . 으로 구분된건 앵간에서 다 검출한다. 오탐이 많음
|
|
PT_RX_URL = '((ftp|mms|https?)://([-\w\.]+)+(:\d+)?(/([\w/_\.]*(\?\S+)?)?)?)'; // 조건을 ftp, mms, http, https로 시작하는걸로 고정 13_1107 11:26:56 sunk
|
|
|
|
PT_RX_IP = '(((\d{1,2})|(1\d{2})|(2[0-4]\d)|(25[0-5]))\.){3}((\d{1,2})|(1\d{2})|(2[0-4]\d)|(25[0-5]))';
|
|
PT_RX_HP = '(\s|^)(0(1[016789]))(\)|-| )\d{3,4}(-| )\d{4}(?=([^\d-])|$)';
|
|
PT_RX_PHONE = '0(2|([3-6][1-5]))\W?\d{3,4}\W?\d{4}';
|
|
|
|
type
|
|
TTgPcre = class(TTgObject)
|
|
public
|
|
class function GetMatchValues(sSrcText, sMatchText: String;
|
|
var sResult: String;
|
|
bResultClear: Boolean = true;
|
|
aRexOpt: TRegExOptions = [roIgnoreCase, roMultiLine]): Integer;
|
|
end;
|
|
|
|
function GetCountOverlapWordsCount(sText: String): Integer;
|
|
function GetCountOverlapWords(sText: String; sDm: String = ''): String;
|
|
function RemoveOverlapWords(sText: String; sDm: String = ''): String;
|
|
|
|
implementation
|
|
|
|
uses
|
|
Tocsg.Strings, Tocsg.Safe, Tocsg.Exception;
|
|
|
|
function GetCountOverlapWordsCount(sText: String): Integer;
|
|
var
|
|
StrList: TStringList;
|
|
i, nPos, nLen, nHit: Integer;
|
|
sKwd: String;
|
|
begin
|
|
Result := 0;
|
|
Guard(StrList, TStringList.Create);
|
|
if SplitString(sText, ',', StrList) = 0 then
|
|
exit;
|
|
|
|
for i := 0 to StrList.Count - 1 do
|
|
begin
|
|
sKwd := StrList[i];
|
|
nPos := Pos('(x', sKwd);
|
|
if nPos > 0 then
|
|
begin
|
|
nLen := Length(sKwd) - 1;
|
|
SetLength(sKwd, nLen);
|
|
Inc(Result, StrToIntDef(Copy(sKwd, nPos + 2, nLen - nPos + 2), 1));
|
|
SetLength(sKwd, nPos - 1);
|
|
end else
|
|
Inc(Result);
|
|
end;
|
|
end;
|
|
|
|
function GetCountOverlapWords(sText: String; sDm: String = ''): String;
|
|
var
|
|
ComList,
|
|
OverlWordList: TStringList;
|
|
i, nLastIdx,
|
|
nOverlCnt: Integer;
|
|
sCheck: String;
|
|
begin
|
|
Result := '';
|
|
if sText = '' then
|
|
exit;
|
|
Guard(ComList, TStringList.Create);
|
|
Guard(OverlWordList, TStringList.Create);
|
|
SplitString(sText, ',', ComList);
|
|
while ComList.Count > 0 do
|
|
begin
|
|
nLastIdx := ComList.Count - 1;
|
|
sCheck := ComList[nLastIdx];
|
|
ComList.Delete(nLastIdx);
|
|
|
|
nOverlCnt := 0;
|
|
for i := nLastIdx - 1 downto 0 do
|
|
begin
|
|
if sCheck = ComList[i] then
|
|
begin
|
|
Inc(nOverlCnt);
|
|
ComList.Delete(i);
|
|
end;
|
|
end;
|
|
|
|
if nOverlCnt > 0 then
|
|
sCheck := Format('%s(x%d)', [sCheck, nOverlCnt+1]);
|
|
OverlWordList.Add(sCheck);
|
|
end;
|
|
|
|
if (sDm <> '') and (sDm <> ',') then
|
|
begin
|
|
for i := 0 to OverlWordList.Count - 1 do
|
|
SumString(Result, OverlWordList[i], sDm);
|
|
end else
|
|
Result := OverlWordList.CommaText;
|
|
end;
|
|
|
|
function RemoveOverlapWords(sText: String; sDm: String = ''): String;
|
|
var
|
|
ComList,
|
|
OverlWordList: TStringList;
|
|
i, nLastIdx: Integer;
|
|
sCheck: String;
|
|
begin
|
|
Result := '';
|
|
if sText = '' then
|
|
exit;
|
|
Guard(ComList, TStringList.Create);
|
|
Guard(OverlWordList, TStringList.Create);
|
|
SplitString(sText, ',', ComList);
|
|
while ComList.Count > 0 do
|
|
begin
|
|
nLastIdx := ComList.Count - 1;
|
|
sCheck := ComList[nLastIdx];
|
|
ComList.Delete(nLastIdx);
|
|
|
|
for i := nLastIdx - 1 downto 0 do
|
|
begin
|
|
if sCheck = ComList[i] then
|
|
ComList.Delete(i);
|
|
end;
|
|
|
|
OverlWordList.Add(sCheck);
|
|
end;
|
|
|
|
if (sDm <> '') and (sDm <> ',') then
|
|
begin
|
|
for i := 0 to OverlWordList.Count - 1 do
|
|
SumString(Result, OverlWordList[i], sDm);
|
|
end else
|
|
Result := OverlWordList.CommaText;
|
|
end;
|
|
|
|
{ TTgPcre }
|
|
|
|
class function TTgPcre.GetMatchValues(sSrcText, sMatchText: String;
|
|
var sResult: String;
|
|
bResultClear: Boolean = true;
|
|
aRexOpt: TRegExOptions = [roIgnoreCase, roMultiLine]): Integer;
|
|
// aRexOpt: TRegExOptions = [roIgnoreCase]): Integer;
|
|
var
|
|
rx: TRegEx;
|
|
mc: TMatchCollection;
|
|
i: Integer;
|
|
begin
|
|
Result := 0;
|
|
try
|
|
sSrcText := Trim(sSrcText);
|
|
if sSrcText = '' then
|
|
exit;
|
|
|
|
if sSrcText = #13#10 then
|
|
exit;
|
|
|
|
if bResultClear then
|
|
sResult := '';
|
|
|
|
if sMatchText = '' then
|
|
exit;
|
|
|
|
// rx := TRegEx.Create(TRegEx.Escape(sMatchText, true), aRexOpt);
|
|
rx := TRegEx.Create(sMatchText, aRexOpt);
|
|
|
|
mc := rx.Matches(sSrcText);
|
|
Result := mc.Count;
|
|
if Result > 0 then
|
|
begin
|
|
for i := 0 to Result - 1 do
|
|
if mc.Item[i].Value <> '' then
|
|
SumString(sResult, mc.Item[i].Value, ',')
|
|
else
|
|
Dec(Result);
|
|
end;
|
|
except
|
|
on E: Exception do
|
|
ETgException.TraceException(E, 'Fail .. TTgPcre.GetMatchValues()');
|
|
end;
|
|
end;
|
|
|
|
end.
|
|
|