从awk脚本中,我想生成一个HTML文件。我的字符串可能包含像“<”和“&"这样的字符。有没有一个简短的、经过验证的awk函数来执行转义?
atmip9wb1#
为了逃避最低限度,你可以这样做:
function escapeHtml(t) { # Must do this one first gsub(/&/, "\\&", t); gsub(/"/, "\\"", t) gsub(/</, "\\<", t); gsub(/>/, "\\>", t); return t; }
字符串
vwkv1x7d2#
当然。只需为您想要转换的每一行调用makeEntities()($0)。或者修改它以接受参数。我这样做是为了使用英国国家语料库,它与HTML实体有高度的重叠,但 * 不是100%*,所以如果您需要一些外来字符,您应该验证它们是否正确。
makeEntities()
$0
function makeEntities() { gsub(/á/, "\\á"); gsub(/Á/, "\\Á"); gsub(/ă/, "\\ă"); gsub(/â/, "\\â"); gsub(/´/, "\\´"); gsub(/æ/, "\\æ"); gsub(/Æ/, "\\Æ"); gsub(/α/, "\\&agr;"); gsub(/à/, "\\à"); gsub(/ā/, "\\ā"); gsub(/Ā/, "\\Ā"); gsub(/&/, "\\&"); gsub(/ą/, "\\ą"); gsub(/å/, "\\å"); gsub(/Å/, "\\Å"); gsub(/ã/, "\\ã"); gsub(/ä/, "\\ä"); gsub(/Ä/, "\\Ä"); gsub(/β/, "\\&bgr;"); gsub(/\\/, "\\\"); gsub(/•/, "\\•"); gsub(/ć/, "\\ć"); gsub(/č/, "\\č"); gsub(/Č/, "\\Č"); gsub(/ç/, "\\ç"); gsub(/Ç/, "\\Ç"); gsub(/ĉ/, "\\ĉ"); gsub(/✓/, "\\✓"); gsub(/ˆ/, "\\ˆ"); gsub(/@/, "\\@"); gsub(/©/, "\\©"); gsub(/‐/, "\\‐"); gsub(/ď/, "\\ď"); gsub(/°/, "\\°"); gsub(/δ/, "\\&dgr;"); gsub(/Δ/, "\\&Dgr;"); gsub(/¨/, "\\¨"); gsub(/\$/, "\\$"); gsub(/đ/, "\\đ"); gsub(/é/, "\\é"); gsub(/É/, "\\É"); gsub(/ě/, "\\ě"); gsub(/ê/, "\\ê"); gsub(/è/, "\\è"); gsub(/È/, "\\È"); gsub(/ε/, "\\&egr;"); gsub(/ē/, "\\ē"); gsub(/Ē/, "\\Ē"); gsub(/ę/, "\\ę"); gsub(/ð/, "\\ð"); gsub(/ë/, "\\ë"); gsub(/Ë/, "\\Ë"); gsub(/♭/, "\\♭"); gsub(/½/, "\\½"); gsub(/⅓/, "\\⅓"); gsub(/¼/, "\\¼"); gsub(/⅕/, "\\⅕"); gsub(/⅙/, "\\⅙"); gsub(/⅛/, "\\⅛"); gsub(/⅔/, "\\⅔"); gsub(/⅖/, "\\⅖"); gsub(/¾/, "\\¾"); gsub(/⅗/, "\\⅗"); gsub(/⅜/, "\\⅜"); gsub(/⅘/, "\\⅘"); gsub(/⅝/, "\\⅝"); gsub(/⅞/, "\\⅞"); gsub(/′/, "\\&ft;"); gsub(/γ/, "\\&ggr;"); gsub(/>/, "\\>"); gsub(/½/, "\\½"); gsub(/ħ/, "\\ħ"); gsub(/í/, "\\í"); gsub(/Í/, "\\Í"); gsub(/î/, "\\î"); gsub(/Î/, "\\Î"); gsub(/ì/, "\\ì"); gsub(/ī/, "\\ī"); gsub(/″/, "\\&ins;"); gsub(/¿/, "\\¿"); gsub(/ï/, "\\ï"); gsub(/Ï/, "\\Ï"); gsub(/ĺ/, "\\ĺ"); gsub(/Ĺ/, "\\Ĺ"); gsub(/\{/, "\\{"); gsub(/≤/, "\\≤"); gsub(/λ/, "\\&lgr;"); gsub(/_/, "\\_"); gsub(/\[/, "\\["); gsub(/ł/, "\\ł"); gsub(/Ł/, "\\Ł"); gsub(/</, "\\<"); gsub(/—/, "\\—"); gsub(/μ/, "\\&mgr;"); gsub(/µ/, "\\µ"); gsub(/·/, "\\·"); gsub(/ń/, "\\ń"); gsub(/ň/, "\\ň"); gsub(/ņ/, "\\ņ"); gsub(/–/, "\\–"); gsub(/ñ/, "\\ñ"); gsub(/Ñ/, "\\Ñ"); gsub(/#/, "\\#"); gsub(/ó/, "\\ó"); gsub(/Ó/, "\\Ó"); gsub(/ô/, "\\ô"); gsub(/œ/, "\\œ"); gsub(/ò/, "\\ò"); gsub(/Ω/, "\\Ω"); gsub(/ō/, "\\ō"); gsub(/ø/, "\\ø"); gsub(/Ø/, "\\Ø"); gsub(/õ/, "\\õ"); gsub(/ö/, "\\ö"); gsub(/Ö/, "\\Ö"); gsub(/φ/, "\\&phgr;"); gsub(/\+/, "\\+"); gsub(/±/, "\\±"); gsub(/£/, "\\£"); gsub(/ŕ/, "\\ŕ"); gsub(/√/, "\\√"); gsub(/ř/, "\\ř"); gsub(/Ř/, "\\Ř"); gsub(/\}/, "\\}"); gsub(/®/, "\\®"); gsub(/-/, "\\&rehy;"); gsub(/\]/, "\\]"); gsub(/ś/, "\\ś"); gsub(/Ś/, "\\Ś"); gsub(/š/, "\\š"); gsub(/Š/, "\\Š"); gsub(/ş/, "\\ş"); gsub(/Ş/, "\\Ş"); gsub(/ŝ/, "\\ŝ"); gsub(/σ/, "\\&sgr;"); gsub(/♯/, "\\♯"); gsub(/\//, "\\&shilling;"); gsub(/∼/, "\\∼"); gsub(/\//, "\\/"); gsub(/²/, "\\²"); gsub(/ß/, "\\ß"); gsub(/ť/, "\\ť"); gsub(/ţ/, "\\ţ"); gsub(/τ/, "\\&tgr;"); gsub(/þ/, "\\þ"); gsub(/Þ/, "\\Þ"); gsub(/×/, "\\×"); gsub(/™/, "\\™"); gsub(/ú/, "\\ú"); gsub(/Ú/, "\\Ú"); gsub(/û/, "\\û"); gsub(/ù/, "\\ù"); gsub(/ū/, "\\ū"); gsub(/¨/, "\\¨"); gsub(/ů/, "\\ů"); gsub(/ü/, "\\ü"); gsub(/Ü/, "\\Ü"); gsub(/\|/, "\\|"); gsub(/ŵ/, "\\ŵ"); gsub(/ý/, "\\ý"); gsub(/ŷ/, "\\ŷ"); gsub(/¥/, "\\¥"); gsub(/ÿ/, "\\ÿ"); gsub(/Ÿ/, "\\Ÿ"); gsub(/ź/, "\\ź"); gsub(/Ž/, "\\Ž"); gsub(/ž/, "\\ž"); gsub(/ż/, "\\ż"); }
2条答案
按热度按时间atmip9wb1#
为了逃避最低限度,你可以这样做:
字符串
vwkv1x7d2#
当然。只需为您想要转换的每一行调用
makeEntities()
($0
)。或者修改它以接受参数。我这样做是为了使用英国国家语料库,它与HTML实体有高度的重叠,但 * 不是100%*,所以如果您需要一些外来字符,您应该验证它们是否正确。字符串