incubator-ooo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From h..@apache.org
Subject svn commit: r1177610 - in /incubator/ooo/trunk/main: i18npool/prj/ i18npool/source/search/ postprocess/rebase/ regexp/ scp2/source/ooo/ splitbuild/
Date Fri, 30 Sep 2011 11:29:29 GMT
Author: hdu
Date: Fri Sep 30 11:29:28 2011
New Revision: 1177610

URL: http://svn.apache.org/viewvc?rev=1177610&view=rev
Log:
use ICU regexp instead of LGPL i18nregexp

Removed:
    incubator/ooo/trunk/main/regexp/
Modified:
    incubator/ooo/trunk/main/i18npool/prj/build.lst
    incubator/ooo/trunk/main/i18npool/source/search/makefile.mk
    incubator/ooo/trunk/main/i18npool/source/search/textsearch.cxx
    incubator/ooo/trunk/main/i18npool/source/search/textsearch.hxx
    incubator/ooo/trunk/main/postprocess/rebase/coffbase.txt
    incubator/ooo/trunk/main/scp2/source/ooo/file_library_ooo.scp
    incubator/ooo/trunk/main/scp2/source/ooo/module_hidden_ooo.scp
    incubator/ooo/trunk/main/splitbuild/common.lst

Modified: incubator/ooo/trunk/main/i18npool/prj/build.lst
URL: http://svn.apache.org/viewvc/incubator/ooo/trunk/main/i18npool/prj/build.lst?rev=1177610&r1=1177609&r2=1177610&view=diff
==============================================================================
--- incubator/ooo/trunk/main/i18npool/prj/build.lst (original)
+++ incubator/ooo/trunk/main/i18npool/prj/build.lst Fri Sep 30 11:29:28 2011
@@ -1,4 +1,4 @@
-inp  i18npool    :   bridges sax stoc comphelper ICU:icu i18nutil regexp LIBXSLT:libxslt
NULL
+inp  i18npool    :   bridges sax stoc comphelper ICU:icu i18nutil LIBXSLT:libxslt NULL
 inp  i18npool                                   usr1    -   all inp_mkout NULL
 inp  i18npool\inc                               nmake   -   all inp_inc NULL
 inp  i18npool\source\registerservices           nmake   -   all inp_rserv inp_inc NULL

Modified: incubator/ooo/trunk/main/i18npool/source/search/makefile.mk
URL: http://svn.apache.org/viewvc/incubator/ooo/trunk/main/i18npool/source/search/makefile.mk?rev=1177610&r1=1177609&r2=1177610&view=diff
==============================================================================
--- incubator/ooo/trunk/main/i18npool/source/search/makefile.mk (original)
+++ incubator/ooo/trunk/main/i18npool/source/search/makefile.mk Fri Sep 30 11:29:28 2011
@@ -55,17 +55,14 @@ EXCEPTIONSNOOPTFILES= \
 SHL1TARGET= $(TARGET)
 SHL1OBJS=	$(SLOFILES)
 
-.IF ("$(GUI)"=="UNX" || "$(COM)"=="GCC") && "$(GUI)"!="OS2"
-I18NREGEXPLIB=-li18nregexp$(COMID)
-.ELSE
-I18NREGEXPLIB=ii18nregexp.lib
-.ENDIF
-
 SHL1STDLIBS= \
 				$(CPPULIB) \
 				$(CPPUHELPERLIB) \
 				$(SALLIB) \
-				$(I18NREGEXPLIB)
+				$(I18NREGEXPLIB) \
+				$(ICUINLIB) \
+				$(ICUUCLIB)
+
 
 SHL1DEPN=		makefile.mk
 SHL1VERSIONMAP= $(SOLARENV)/src/component.map

Modified: incubator/ooo/trunk/main/i18npool/source/search/textsearch.cxx
URL: http://svn.apache.org/viewvc/incubator/ooo/trunk/main/i18npool/source/search/textsearch.cxx?rev=1177610&r1=1177609&r2=1177610&view=diff
==============================================================================
--- incubator/ooo/trunk/main/i18npool/source/search/textsearch.cxx (original)
+++ incubator/ooo/trunk/main/i18npool/source/search/textsearch.cxx Fri Sep 30 11:29:28 2011
@@ -30,7 +30,6 @@
 
 #include "textsearch.hxx"
 #include "levdis.hxx"
-#include <regexp/reclass.hxx>
 #include <com/sun/star/lang/Locale.hpp>
 #include <com/sun/star/lang/XMultiServiceFactory.hpp>
 #include <comphelper/processfactory.hxx>
@@ -68,11 +67,8 @@ static sal_Int32 COMPLEX_TRANS_MASK_TMP 
     TransliterationModules_ignoreIandEfollowedByYa_ja_JP |
     TransliterationModules_ignoreKiKuFollowedBySa_ja_JP |
     TransliterationModules_ignoreProlongedSoundMark_ja_JP;
-static const sal_Int32 SIMPLE_TRANS_MASK = 0xffffffff ^ COMPLEX_TRANS_MASK_TMP;
-static const sal_Int32 COMPLEX_TRANS_MASK =
-    COMPLEX_TRANS_MASK_TMP |
-    TransliterationModules_IGNORE_KANA |
-    TransliterationModules_IGNORE_WIDTH;
+static const sal_Int32 SIMPLE_TRANS_MASK = ~(COMPLEX_TRANS_MASK_TMP | TransliterationModules_IGNORE_WIDTH)
| TransliterationModules_FULLWIDTH_HALFWIDTH;
+static const sal_Int32 COMPLEX_TRANS_MASK = COMPLEX_TRANS_MASK_TMP | TransliterationModules_IGNORE_KANA
| TransliterationModules_FULLWIDTH_HALFWIDTH;
     // Above 2 transliteration is simple but need to take effect in
     // complex transliteration
 
@@ -80,7 +76,7 @@ TextSearch::TextSearch(const Reference <
         : xMSF( rxMSF )
         , pJumpTable( 0 )
         , pJumpTable2( 0 )
-        , pRegExp( 0 )
+        , pRegexMatcher( NULL )
         , pWLD( 0 )
 {
     SearchOptions aOpt;
@@ -92,7 +88,7 @@ TextSearch::TextSearch(const Reference <
 
 TextSearch::~TextSearch()
 {
-    delete pRegExp;
+    delete pRegexMatcher;
     delete pWLD;
     delete pJumpTable;
     delete pJumpTable2;
@@ -102,7 +98,7 @@ void TextSearch::setOptions( const Searc
 {
     aSrchPara = rOptions;
 
-    delete pRegExp, pRegExp = 0;
+    delete pRegexMatcher, pRegexMatcher = NULL;
     delete pWLD, pWLD = 0;
     delete pJumpTable, pJumpTable = 0;
     delete pJumpTable2, pJumpTable2 = 0;
@@ -161,13 +157,13 @@ void TextSearch::setOptions( const Searc
 
     sSrchStr = aSrchPara.searchString;
 
-    // use transliteration here, but only if not RegEx, which does it different
-    if ( aSrchPara.algorithmType != SearchAlgorithms_REGEXP && xTranslit.is() &&
+    // use transliteration here
+    if ( xTranslit.is() &&
 	 aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK )
         sSrchStr = xTranslit->transliterateString2String(
                 aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
 
-    if ( aSrchPara.algorithmType != SearchAlgorithms_REGEXP && xTranslit2.is() &&
+    if ( xTranslit2.is() &&
 	 aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK )
 	sSrchStr2 = xTranslit2->transliterateString2String(
 	        aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
@@ -179,17 +175,34 @@ void TextSearch::setOptions( const Searc
     checkCTLEnd = (xBreak.is() && (xBreak->getScriptType(sSrchStr,
                     sSrchStr.getLength()-1) == ScriptType::COMPLEX));
 
-    if ( aSrchPara.algorithmType == SearchAlgorithms_REGEXP )
+    switch( aSrchPara.algorithmType)
     {
-        fnForward = &TextSearch::RESrchFrwrd;
-        fnBackward = &TextSearch::RESrchBkwrd;
+		case SearchAlgorithms_REGEXP:
+			fnForward = &TextSearch::RESrchFrwrd;
+			fnBackward = &TextSearch::RESrchBkwrd;
+
+			{
+			sal_uInt32 nIcuSearchFlags = 0;
+			// map com::sun::star::util::SearchFlags to ICU uregex.h flags
+			// TODO: REG_EXTENDED, REG_NOT_BEGINOFLINE, REG_NOT_ENDOFLINE
+			// REG_NEWLINE is neither defined properly nor used anywhere => not implemented
+			// REG_NOSUB is not used anywhere => not implemented
+			// NORM_WORD_ONLY is only used for SearchAlgorithm==Absolute
+			// LEV_RELAXED is only used for SearchAlgorithm==Approximate
+			// why is even ALL_IGNORE_CASE deprecated in UNO? because of transliteration taking care
of it???
+			if( (aSrchPara.searchFlag & com::sun::star::util::SearchFlags::ALL_IGNORE_CASE) !=
0)
+				nIcuSearchFlags |= UREGEX_CASE_INSENSITIVE;
+			UErrorCode nIcuErr = U_ZERO_ERROR;
+			// assumption: transliteration doesn't mangle regexp control chars
+			OUString& rPatternStr = (aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK) ? sSrchStr
+					: ((aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK) ? sSrchStr2 : aSrchPara.searchString);
+			const IcuUniString aIcuSearchPatStr( rPatternStr.getStr(), rPatternStr.getLength());
+			pRegexMatcher = new RegexMatcher( aIcuSearchPatStr, nIcuSearchFlags, nIcuErr);
+			if( nIcuErr)
+				{ delete pRegexMatcher; pRegexMatcher = NULL;}
+			} break;
 
-        pRegExp = new Regexpr( aSrchPara, xTranslit );
-    }
-    else 
-    {
-        if ( aSrchPara.algorithmType == SearchAlgorithms_APPROXIMATE )
-        {
+		case SearchAlgorithms_APPROXIMATE:
             fnForward = &TextSearch::ApproxSrchFrwrd;
             fnBackward = &TextSearch::ApproxSrchBkwrd;
 
@@ -198,12 +211,12 @@ void TextSearch::setOptions( const Searc
                     0 != (SearchFlags::LEV_RELAXED & aSrchPara.searchFlag ) );
 
             nLimit = pWLD->GetLimit();
-        }
-        else
-        {
+			break;
+		
+		default:
             fnForward = &TextSearch::NSrchFrwrd;
             fnBackward = &TextSearch::NSrchBkwrd;
-        }
+			break;
     }
 }
 
@@ -400,9 +413,7 @@ SearchResult TextSearch::searchBackward(
     return sres;
 }
 
-
-
-//--------------- die Wort-Trennner ----------------------------------
+//---------------------------------------------------------------------
 
 bool TextSearch::IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const
 {
@@ -430,10 +441,8 @@ bool TextSearch::IsDelimiter( const OUSt
     return bRet;
 }
 
-
-
-// --------- methods for the kind of boyer-morre search ------------------
-
+// --------- helper methods for Boyer-Moore like text searching ----------
+// TODO: use ICU's regex UREGEX_LITERAL mode instead when it becomes available
 
 void TextSearch::MakeForwardTab()
 {
@@ -715,132 +724,73 @@ SearchResult TextSearch::NSrchBkwrd( con
     return aRet;
 }
 
-
-
 //---------------------------------------------------------------------------
-// ------- Methoden fuer die Suche ueber Regular-Expressions --------------
 
 SearchResult TextSearch::RESrchFrwrd( const OUString& searchStr,
                                       sal_Int32 startPos, sal_Int32 endPos )
             throw(RuntimeException)
 {
-    SearchResult aRet;
-    aRet.subRegExpressions = 0;
-    OUString aStr( searchStr );
-
-    bool bSearchInSel = (0 != (( SearchFlags::REG_NOT_BEGINOFLINE |
-                    SearchFlags::REG_NOT_ENDOFLINE ) & aSrchPara.searchFlag ));
-
-    pRegExp->set_line(aStr.getStr(), bSearchInSel ? endPos : aStr.getLength());
-
-    struct re_registers regs;
-
-    // Clear structure
-    memset((void *)&regs, 0, sizeof(struct re_registers));
-    if ( ! pRegExp->re_search(&regs, startPos) )
-    {
-        if( regs.num_of_match > 0 &&
-                (regs.start[0] != -1 && regs.end[0] != -1) )
-        {
-            aRet.startOffset.realloc(regs.num_of_match);
-            aRet.endOffset.realloc(regs.num_of_match);
+	SearchResult aRet;
+	aRet.subRegExpressions = 0;
+	if( !pRegexMatcher)
+		return aRet;
+	
+	if( endPos > searchStr.getLength())
+		endPos = searchStr.getLength();
+
+	// use the ICU RegexMatcher to find the matches
+	UErrorCode nIcuErr = U_ZERO_ERROR;
+	const IcuUniString aSearchTargetStr( searchStr.getStr(), endPos);
+	pRegexMatcher->reset( aSearchTargetStr);
+	if( !pRegexMatcher->find( startPos, nIcuErr))
+		return aRet;
+
+	aRet.subRegExpressions = 1;
+	aRet.startOffset.realloc( aRet.subRegExpressions);
+	aRet.endOffset.realloc( aRet.subRegExpressions);
+	aRet.startOffset[0] = pRegexMatcher->start( nIcuErr);
+	aRet.endOffset[0]   = pRegexMatcher->end( nIcuErr);
 
-            sal_Int32 i = 0, j = 0;
-            while( j < regs.num_of_match )
-            {
-                if( regs.start[j] != -1 && regs.end[j] != -1 )
-                {
-                    aRet.startOffset[i] = regs.start[j];
-                    aRet.endOffset[i] = regs.end[j];
-                    ++i;
-                }
-                ++j;
-            }
-            aRet.subRegExpressions = i;
-        }
-        if ( regs.num_regs > 0 )
-        {
-            if ( regs.start )
-                free(regs.start);
-            if ( regs.end )
-                free(regs.end);
-        }
-    }
-
-    return aRet;
+	return aRet;
 }
 
-/*
- * Sucht das Muster aSrchPara.sSrchStr rueckwaerts im String rStr
- */
 SearchResult TextSearch::RESrchBkwrd( const OUString& searchStr,
                                       sal_Int32 startPos, sal_Int32 endPos )
             throw(RuntimeException)
 {
-    SearchResult aRet;
-    aRet.subRegExpressions = 0;
-    OUString aStr( searchStr );
-
-    sal_Int32 nOffset = 0;
-    sal_Int32 nStrEnde = aStr.getLength() == endPos ? 0 : endPos;
-
-    bool bSearchInSel = (0 != (( SearchFlags::REG_NOT_BEGINOFLINE |
-                    SearchFlags::REG_NOT_ENDOFLINE ) & aSrchPara.searchFlag ));
+	// NOTE: for backwards search callers provide startPos/endPos inverted!
+	SearchResult aRet;
+	aRet.subRegExpressions = 0;
+	if( !pRegexMatcher)
+		return aRet;
+	
+	if( startPos > searchStr.getLength())
+		startPos = searchStr.getLength();
+
+	// use the ICU RegexMatcher to find the matches
+	// TODO: use ICU's backward searching once it becomes available
+	UErrorCode nIcuErr = U_ZERO_ERROR;
+	const IcuUniString aSearchTargetStr( searchStr.getStr(), startPos);
+	pRegexMatcher->reset( aSearchTargetStr);
+	if( !pRegexMatcher->find( endPos, nIcuErr))
+		return aRet;
+
+	aRet.subRegExpressions = 1;
+	aRet.startOffset.realloc( aRet.subRegExpressions);
+	aRet.endOffset.realloc( aRet.subRegExpressions);
+
+	do {
+		// NOTE: backward search seems to be expected to have startOfs/endOfs inverted!
+		aRet.startOffset[0] = pRegexMatcher->end( nIcuErr);
+		aRet.endOffset[0]   = pRegexMatcher->start( nIcuErr);
+	} while( pRegexMatcher->find( aRet.endOffset[0]+1, nIcuErr));
 
-    if( startPos )
-        nOffset = startPos - 1;
-
-    // search only in the subString
-    if( bSearchInSel && nStrEnde )
-    {
-        aStr = aStr.copy( nStrEnde, aStr.getLength() - nStrEnde );
-        if( nOffset > nStrEnde )
-            nOffset = nOffset - nStrEnde;
-        else
-            nOffset = 0;
-    }
-
-    // set the length to negative for reverse search
-    pRegExp->set_line( aStr.getStr(), -(aStr.getLength()) );
-    struct re_registers regs;
-
-    // Clear structure
-    memset((void *)&regs, 0, sizeof(struct re_registers));
-    if ( ! pRegExp->re_search(&regs, nOffset) )
-    {
-        if( regs.num_of_match > 0 &&
-                (regs.start[0] != -1 && regs.end[0] != -1) )
-        {
-            nOffset = bSearchInSel ? nStrEnde : 0;
-            aRet.startOffset.realloc(regs.num_of_match);
-            aRet.endOffset.realloc(regs.num_of_match);
-
-            sal_Int32 i = 0, j = 0;
-            while( j < regs.num_of_match )
-            {
-                if( regs.start[j] != -1 && regs.end[j] != -1 )
-                {
-                    aRet.startOffset[i] = regs.end[j] + nOffset;
-                    aRet.endOffset[i] = regs.start[j] + nOffset;
-                    ++i;
-                }
-                ++j;
-            }
-            aRet.subRegExpressions = i;
-        }
-        if ( regs.num_regs > 0 )
-        {
-            if ( regs.start )
-                free(regs.start);
-            if ( regs.end )
-                free(regs.end);
-        }
-    }
-
-    return aRet;
+	return aRet;
 }
 
-// Phonetische Suche von Worten
+//---------------------------------------------------------------------------
+
+// search for words phonetically
 SearchResult TextSearch::ApproxSrchFrwrd( const OUString& searchStr,
                                           sal_Int32 startPos, sal_Int32 endPos )
             throw(RuntimeException)

Modified: incubator/ooo/trunk/main/i18npool/source/search/textsearch.hxx
URL: http://svn.apache.org/viewvc/incubator/ooo/trunk/main/i18npool/source/search/textsearch.hxx?rev=1177610&r1=1177609&r2=1177610&view=diff
==============================================================================
--- incubator/ooo/trunk/main/i18npool/source/search/textsearch.hxx (original)
+++ incubator/ooo/trunk/main/i18npool/source/search/textsearch.hxx Fri Sep 30 11:29:28 2011
@@ -38,12 +38,15 @@
 
 #include <map>
 
-class Regexpr;
+#include <unicode/regex.h>
+using namespace U_ICU_NAMESPACE;
+typedef U_ICU_NAMESPACE::UnicodeString IcuUniString;
+
 class WLevDistance;
 typedef ::std::map< sal_Unicode, sal_Int32 > TextSearchJumpTable;
 
 //	----------------------------------------------------
-//	class SearchClass
+//	class TextSearch
 //	----------------------------------------------------
 class TextSearch: public cppu::WeakImplHelper2
 <
@@ -93,7 +96,7 @@ class TextSearch: public cppu::WeakImplH
 							throw(::com::sun::star::uno::RuntimeException);
 
 	// Members and methods for the regular expression search
-	Regexpr* pRegExp;
+	RegexMatcher* pRegexMatcher;
 	::com::sun::star::util::SearchResult SAL_CALL
 		RESrchFrwrd( const ::rtl::OUString& searchStr,
 								sal_Int32 startPos, sal_Int32 endPos )
@@ -150,5 +153,4 @@ public:
                 throw( ::com::sun::star::uno::RuntimeException );
 };
 
- 
 #endif

Modified: incubator/ooo/trunk/main/postprocess/rebase/coffbase.txt
URL: http://svn.apache.org/viewvc/incubator/ooo/trunk/main/postprocess/rebase/coffbase.txt?rev=1177610&r1=1177609&r2=1177610&view=diff
==============================================================================
--- incubator/ooo/trunk/main/postprocess/rebase/coffbase.txt (original)
+++ incubator/ooo/trunk/main/postprocess/rebase/coffbase.txt Fri Sep 30 11:29:28 2011
@@ -120,7 +120,6 @@ hwp.dll          0x0000000063740000 0x00
 hyphenmi.dll     0x0000000063710000 0x00020000
 i18nisolang1msc.dll 0x00000000636f0000 0x00010000
 i18npool.uno.dll 0x00000000635b0000 0x00130000
-i18nregexpmsc.dll 0x0000000063590000 0x00010000
 i18nsearch.uno.dll 0x0000000063570000 0x00010000
 i18nutilmsc.dll  0x0000000063540000 0x00020000
 icdmi.dll        0x0000000063520000 0x00010000

Modified: incubator/ooo/trunk/main/scp2/source/ooo/file_library_ooo.scp
URL: http://svn.apache.org/viewvc/incubator/ooo/trunk/main/scp2/source/ooo/file_library_ooo.scp?rev=1177610&r1=1177609&r2=1177610&view=diff
==============================================================================
--- incubator/ooo/trunk/main/scp2/source/ooo/file_library_ooo.scp (original)
+++ incubator/ooo/trunk/main/scp2/source/ooo/file_library_ooo.scp Fri Sep 30 11:29:28 2011
@@ -647,17 +647,6 @@ File gid_File_Lib_I18npool
   #endif
 End
 
-File gid_File_Lib_I18nregexp
-    TXT_FILE_BODY;
-    Styles = (PACKED);
-    Dir = SCP2_OOO_BIN_DIR;
-  #ifdef UNX
-    Name = STRING(CONCAT3(libi18nregexp,COMID,UNXSUFFIX));
-  #else
-    Name = STRING(CONCAT3(i18nregexp,COMID,.dll));
-  #endif
-End
-
 File gid_File_Lib_I18nsearch
     TXT_FILE_BODY;
     Styles = (PACKED);

Modified: incubator/ooo/trunk/main/scp2/source/ooo/module_hidden_ooo.scp
URL: http://svn.apache.org/viewvc/incubator/ooo/trunk/main/scp2/source/ooo/module_hidden_ooo.scp?rev=1177610&r1=1177609&r2=1177610&view=diff
==============================================================================
--- incubator/ooo/trunk/main/scp2/source/ooo/module_hidden_ooo.scp (original)
+++ incubator/ooo/trunk/main/scp2/source/ooo/module_hidden_ooo.scp Fri Sep 30 11:29:28 2011
@@ -319,7 +319,6 @@ Module gid_Module_Root_Files_5
 	gid_File_Lib_Dbodbcbase,
 	gid_File_Lib_I18npool,
 	gid_File_Lib_I18npaper,
-	gid_File_Lib_I18nregexp,
 	gid_File_Lib_I18nsearch,
     gid_File_Lib_I18nisolang,
 	gid_File_Lib_I18nutil,

Modified: incubator/ooo/trunk/main/splitbuild/common.lst
URL: http://svn.apache.org/viewvc/incubator/ooo/trunk/main/splitbuild/common.lst?rev=1177610&r1=1177609&r2=1177610&view=diff
==============================================================================
--- incubator/ooo/trunk/main/splitbuild/common.lst (original)
+++ incubator/ooo/trunk/main/splitbuild/common.lst Fri Sep 30 11:29:28 2011
@@ -1 +1 @@
-basebmp basegfx bean comphelper configmgr connectivity embeddedobj embedserv eventattacher
fileaccess i18npool i18nutil linguistic lingucomponent o3tl officecfg oovbaapi package pyuno
regexp rsc sax shell sot svl tools transex3 ucb ucbhelper unotools unoxml vos xmlhelp xmloff
xmlscript wizards
+basebmp basegfx bean comphelper configmgr connectivity embeddedobj embedserv eventattacher
fileaccess i18npool i18nutil linguistic lingucomponent o3tl officecfg oovbaapi package pyuno
rsc sax shell sot svl tools transex3 ucb ucbhelper unotools unoxml vos xmlhelp xmloff xmlscript
wizards



Mime
View raw message