result = re.split(r",(?!(?:[^,\[\]]+,)*[^,\[\]]+])", subject, 0)
, # Match the character “,” literally
(?! # Assert that it is impossible to match the regex below starting at this position (negative lookahead)
(?: # Match the regular expression below
[^,\[\]] # Match any single character NOT present in the list below
# The literal character “,”
# The literal character “[”
# The literal character “]”
+ # Between one and unlimited times, as many times as possible, giving back as needed (greedy)
, # Match the character “,” literally
)
* # Between zero and unlimited times, as many times as possible, giving back as needed (greedy)
[^,\[\]] # Match any single character NOT present in the list below
# The literal character “,”
# The literal character “[”
# The literal character “]”
+ # Between one and unlimited times, as many times as possible, giving back as needed (greedy)
] # Match the character “]” literally
)
s = "year:2020,concepts:[ab553,cd779],publisher:elsevier"
def split_by_commas(s):
lst = list()
last_bracket = ''
word = ""
for c in s:
if c == '[' or c == ']':
last_bracket = c
if c == ',' and last_bracket == ']':
lst.append(word)
word = ""
continue
elif c == ',' and last_bracket == '[':
word += c
continue
elif c == ',':
lst.append(word)
word = ""
continue
word += c
lst.append(word)
return lst
main_lst = split_by_commas(s)
print(main_lst)
def split_by_commas(s):
lst = list()
brackets = 0
word = ""
for c in s:
if c == "[":
brackets += 1
elif c == "]":
if brackets > 0:
brackets -= 1
elif c == "," and not brackets:
lst.append(word)
word = ""
continue
word += c
lst.append(word)
return lst
5条答案
按热度按时间qq24tv8q1#
更新以支持括号中的2个以上项目。
yv5phkfx2#
这个正则表达式适用于你的例子:
在这里,我们使用正向前瞻查找逗号,然后查找非逗号和冒号字符,最后查找冒号。这将正确地找到您要搜索的
<comma><key>
模式。当然,如果允许键使用逗号,则需要进一步调整。您可以查看regexr here
hrirmatl3#
您可以使用用户定义的函数而不是split来解决这个问题:
运行上述代码的结果:
t3psigkw4#
如果使用只具有lookahead的模式Assert右侧的字符,则不会Assert左侧是否有伴随字符。
除了使用拆分,您还可以匹配方括号中的值的一个或多个重复,或者匹配除逗号之外的任何字符。
Regex demo
产出
wmtdaxz35#
我采用了@Bemwa的解决方案(不适用于我的用例)