BSOne.SFC/Tocsg.Lib/VCL/Tocsg.PCRE.pas

202 lines
5.9 KiB
Plaintext

{*******************************************************}
{ }
{ Tocsg.PCRE }
{ }
{ Copyright (C) 2022 sunk }
{ }
{*******************************************************}
unit Tocsg.PCRE;
interface
uses
Tocsg.Obj, Classes, SysUtils,
WinApi.Windows, System.RegularExpressions;
const
PT_RX_IDNUM = '\d{2}(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])\W?[1-4]\d{6}'; // or \d{2}(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])-[1-4]\d{7}
//3 : JCB나 다이너스, 4 : 비자, 5 : 마스터, 6 : 중국의 은련 카드, 9 : 세계 공통 국내 전용카드
PT_RX_CARDNUM = '(3|4|5|6|9)\d{3}\W?\d{4}\W?\d{4}\W?\d{4}';
PT_RX_EMAIL = '[\w\-]+@(?:(?:[\w\-]{2,}\.)+[a-zA-Z]{2,})';
//EMAIL = [_a-zA-Z0-9-]+([-+.][_a-zA-Z0-9]+)*[\^ &%*@]{0,2}\@[\^ &%*@]{0,2}[_a-zA-Z0-9]+([-.][_a-zA-Z0-9]+)*\.[-a-zA-Z0-9]+([-.][_a-zA-Z0-9]+)*;
// PT_RX_URL = '((ftp|https?)://|www)([a-zA-Z_0-9\W]*)[-\w.]+(/([\w/_.]*(\?\S+)?)?)?'; // 이건 매우좋지 않다. x 않좋은 예라 남겨놓음
// PT_RX_URL = '((?:https?://)?(?:www\.)?[-a-z\d]{1,9}\.[-a-z\d]{2,5}(?:\.[-a-z\d]{2,4})?)'; // 이건 . 으로 구분된건 앵간에서 다 검출한다. 오탐이 많음
PT_RX_URL = '((ftp|mms|https?)://([-\w\.]+)+(:\d+)?(/([\w/_\.]*(\?\S+)?)?)?)'; // 조건을 ftp, mms, http, https로 시작하는걸로 고정 13_1107 11:26:56 sunk
PT_RX_IP = '(((\d{1,2})|(1\d{2})|(2[0-4]\d)|(25[0-5]))\.){3}((\d{1,2})|(1\d{2})|(2[0-4]\d)|(25[0-5]))';
PT_RX_HP = '(\s|^)(0(1[016789]))(\)|-| )\d{3,4}(-| )\d{4}(?=([^\d-])|$)';
PT_RX_PHONE = '0(2|([3-6][1-5]))\W?\d{3,4}\W?\d{4}';
type
TTgPcre = class(TTgObject)
public
class function GetMatchValues(sSrcText, sMatchText: String;
var sResult: String;
bResultClear: Boolean = true;
aRexOpt: TRegExOptions = [roIgnoreCase, roMultiLine]): Integer;
end;
function GetCountOverlapWordsCount(sText: String): Integer;
function GetCountOverlapWords(sText: String; sDm: String = ''): String;
function RemoveOverlapWords(sText: String; sDm: String = ''): String;
implementation
uses
Tocsg.Strings, Tocsg.Safe, Tocsg.Exception;
function GetCountOverlapWordsCount(sText: String): Integer;
var
StrList: TStringList;
i, nPos, nLen, nHit: Integer;
sKwd: String;
begin
Result := 0;
Guard(StrList, TStringList.Create);
if SplitString(sText, ',', StrList) = 0 then
exit;
for i := 0 to StrList.Count - 1 do
begin
sKwd := StrList[i];
nPos := Pos('(x', sKwd);
if nPos > 0 then
begin
nLen := Length(sKwd) - 1;
SetLength(sKwd, nLen);
Inc(Result, StrToIntDef(Copy(sKwd, nPos + 2, nLen - nPos + 2), 1));
SetLength(sKwd, nPos - 1);
end else
Inc(Result);
end;
end;
function GetCountOverlapWords(sText: String; sDm: String = ''): String;
var
ComList,
OverlWordList: TStringList;
i, nLastIdx,
nOverlCnt: Integer;
sCheck: String;
begin
Result := '';
if sText = '' then
exit;
Guard(ComList, TStringList.Create);
Guard(OverlWordList, TStringList.Create);
SplitString(sText, ',', ComList);
while ComList.Count > 0 do
begin
nLastIdx := ComList.Count - 1;
sCheck := ComList[nLastIdx];
ComList.Delete(nLastIdx);
nOverlCnt := 0;
for i := nLastIdx - 1 downto 0 do
begin
if sCheck = ComList[i] then
begin
Inc(nOverlCnt);
ComList.Delete(i);
end;
end;
if nOverlCnt > 0 then
sCheck := Format('%s(x%d)', [sCheck, nOverlCnt+1]);
OverlWordList.Add(sCheck);
end;
if (sDm <> '') and (sDm <> ',') then
begin
for i := 0 to OverlWordList.Count - 1 do
SumString(Result, OverlWordList[i], sDm);
end else
Result := OverlWordList.CommaText;
end;
function RemoveOverlapWords(sText: String; sDm: String = ''): String;
var
ComList,
OverlWordList: TStringList;
i, nLastIdx: Integer;
sCheck: String;
begin
Result := '';
if sText = '' then
exit;
Guard(ComList, TStringList.Create);
Guard(OverlWordList, TStringList.Create);
SplitString(sText, ',', ComList);
while ComList.Count > 0 do
begin
nLastIdx := ComList.Count - 1;
sCheck := ComList[nLastIdx];
ComList.Delete(nLastIdx);
for i := nLastIdx - 1 downto 0 do
begin
if sCheck = ComList[i] then
ComList.Delete(i);
end;
OverlWordList.Add(sCheck);
end;
if (sDm <> '') and (sDm <> ',') then
begin
for i := 0 to OverlWordList.Count - 1 do
SumString(Result, OverlWordList[i], sDm);
end else
Result := OverlWordList.CommaText;
end;
{ TTgPcre }
class function TTgPcre.GetMatchValues(sSrcText, sMatchText: String;
var sResult: String;
bResultClear: Boolean = true;
aRexOpt: TRegExOptions = [roIgnoreCase, roMultiLine]): Integer;
// aRexOpt: TRegExOptions = [roIgnoreCase]): Integer;
var
rx: TRegEx;
mc: TMatchCollection;
i: Integer;
begin
Result := 0;
try
sSrcText := Trim(sSrcText);
if sSrcText = '' then
exit;
if sSrcText = #13#10 then
exit;
if bResultClear then
sResult := '';
if sMatchText = '' then
exit;
// rx := TRegEx.Create(TRegEx.Escape(sMatchText, true), aRexOpt);
rx := TRegEx.Create(sMatchText, aRexOpt);
mc := rx.Matches(sSrcText);
Result := mc.Count;
if Result > 0 then
begin
for i := 0 to Result - 1 do
if mc.Item[i].Value <> '' then
SumString(sResult, mc.Item[i].Value, ',')
else
Dec(Result);
end;
except
on E: Exception do
ETgException.TraceException(E, 'Fail .. TTgPcre.GetMatchValues()');
end;
end;
end.