<?php
# as already pointed out, your words *may* need sanitization
foreach($words as $k=>$v)
$words[$k]=preg_quote($v,'/');
# and to be collapsed into a **big regexpy goodness**
$words=implode('|',$words);
# after that, a single preg_replace_callback() would do
$string = preg_replace_callback('/\b('. $words .')\b/i', "my_beloved_callback", $string);
function my_beloved_callback($m)
{
$len=strlen($m[1])-1;
return $m[1][0].str_repeat('*',$len);
}
/ #start of pattern delimiter
(?: #non-capturing group to encapsulate logic
\b #position separating word character and non-word character
(?= #start lookahead -- to match without consuming letters
(?:fook|shoot) #OR-delimited bad words
\b #position separating word character and non-word character
) #end lookahead
\w #first word character of bad word
\K #forget first matched word character
| #OR -- to set up \G technique
\G(?!^) #continue matching from previous match but not from the start of the string
) #end of non-capturing group
\w #match non-first letter of bad word
/ #ending pattern delimiter
i #make pattern case-insensitive
function lowercase_except_first_letter($s) {
// the following line SKIP the first character and pass it to callback func...
// allows to keep the first letter even in words in quotes and brackets.
// alternative regex is '/(?<!^|\s|\W)(\w)/u'.
return preg_replace_callback('/(\B\w)/u', function($m) {
return mb_strtolower($m[1]);
}, $s);
}
5条答案
按热度按时间dpiehjr41#
chhqkbe12#
这可以通过许多方式来完成,使用非常奇怪的自动生成的regexp...但我相信使用
preg_replace_callback()
最终会更 * 健壮 *31moq8wy3#
snvhrwxg4#
假设要屏蔽的坏单词黑名单完全由字母或至少由单词字符(允许数字和下划线)组成,则在内爆和插入正则表达式模式之前不需要调用
preg_quote()
。在匹配到限定单词的第一个字母后,使用
\G
元字符继续匹配。坏单词中的每个随后匹配的字母将被星号1对1替换。\K
用于忘记/释放坏单词的第一个字母。这种方法不需要调用
preg_replace_callback()
来测量每个匹配的字符串,并在文本块中每个匹配坏单词的第一个字母后写入N个星号。细分:
代码:(Demo)
xhv8bpkk5#
这里是PHP的Unicode友好的正则表达式。这个函数本身有不同的用途,但你可以从这里使用的正则表达式中得到一个想法。