regex 创建一个VBA,它将查找所有文本引用并导出到另一个文档

m4pnthwp  于 2023-01-14  发布在  其他
关注(0)|答案(1)|浏览(129)

我正在尝试创建一个VBA宏,将确定在文本引用和出口到另一个页面。到目前为止,我已经取得了一些适度的成功,但只能让它与一个表达式,找到一些引用。我需要它来确定所有的引用,无论是那些在括号内和他们。例如在文本引用
一位作者(Smith,2015)......Smith(2015)认为......
两位作者(Smith和Jones,2015)...根据Smith和Jones(2015)...
三位作者(Smith、Jones和Brown,2015)...... Smith、Jones和Brown(2015)的研究表明......
4位或更多作者(Smith等人,2015)Smith等人(2015)证明......
目前我有这个代码:

Sub ExtractRefsFromSelection()
    MsgBox ("This macro extracts references from selected text.")
    Dim SearchRange As Range, DestinationDoc$, SourceDoc$
    DestinationDoc$ = "Refs.doc"
    SourceDoc$ = ActiveDocument.Name
    Documents.Add DocumentType:=wdNewBlankDocument
    ActiveDocument.SaveAs DestinationDoc$, wdFormatDocument
    Documents(SourceDoc$).Activate
    Set SearchRange = ActiveDocument.Range
    With SearchRange.Find
        .ClearFormatting
        .Text = "\([!\)]@[0-9]{4}\)"
        .Replacement.Text = ""
        .Forward = True
        .Wrap = wdFindStop
        .Format = False
        .MatchCase = False
        .MatchAllWordForms = False
        .MatchSoundsLike = False
        .MatchWildcards = True
        While .Execute
            Documents(DestinationDoc$).Range.Text = Documents(DestinationDoc$).Range.Text + SearchRange.Text
        Wend
    End With
End Sub
xzabzqsa

xzabzqsa1#

使用正则表达式库--〉将“Microsoft VBScript Regular Expressions 5.5”添加到您的项目中会更容易、更高效。然后您可以使用以下代码:

Option Explicit

'this is the complete pattern
Private Const pattern As String = "(\([. \,\w-]+ \d{4}\)|[\w-]+ \(\d{4}\)|[\w-]+ and [\w-]+ \(\d{4}\)|[\w-]+, [\w-]+ and [\w-]+ \(\d{4}\)|[\w-]+ et al. \(\d{4}\))"

'\w only returns characters until ASCII 128 - therefore we have to extend this
Private Const specialCharacters As String = "\A-Za-z0-9\u00C0-\u00FF\u03B1-\u03C9"  'to replace \w

'just in case - if only single patterns should be searched
'and to understand the single parts
Private Const patternWithinBrackets As String = "\([. \,\w-]+ \d{4}\)"   '(Smith and Jones, 2015)
Private Const patternOneAuthor As String = "[\w-]+ \(\d{4}\)"   'Smith (2015)
Private Const patternTwoAuthors As String = "[\w-]+ and [\w-]+ \(\d{4}\)"  'Smith-Meier and Jones (2015)
Private Const patternThreeAuthors As String = "[\w-]+, [\w-]+ and [\w-]+ \(\d{4}\)"  'Smith, Müller and Brown (2015)
Private Const patternMoreAuthors As String = "[\w-]+ et al. \(\d{4}\)"  'Smith et al. (2015)

Sub printAllCitations()
Dim text As String
text = Selection.text
'text = ThisDocument.Content    'reads the whole document (w/o header/footer etc.)
Debug.Print regEx_Find(text, patternAllCitations)
' Documents(DestinationDoc$).Range.Text = regEx_Find(text, patternAllCitations)
End Sub

Public Function regEx_Find(strText As String, pattern As String) As String
'returns a string with all matches concatenated by vbcrlf 

Dim regEx As RegExp: Set regEx = New RegExp
Dim result As String
With regEx
    .pattern = Replace(pattern, "\w", specialCharacters)  'this is the place where we add special characters to the pattern
    .Global = True
    .IgnoreCase = False
    If .Test(strText) = True Then
        Dim matches As MatchCollection
        Dim m As Match
        
        Set matches = .Execute(strText)
        For Each m In matches
            result = result & vbCrLf & m.Value
        Next
    End If
End With
regEx_Find = result
End Function

相关问题