From 22d232030a457fa4b35f63fa2711fd045b9e338e Mon Sep 17 00:00:00 2001 From: xmcp Date: Tue, 2 Apr 2019 21:10:24 +0800 Subject: [PATCH] fix text_splitter regexp --- src/text_splitter.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/text_splitter.js b/src/text_splitter.js index 93e817a..8bfe786 100644 --- a/src/text_splitter.js +++ b/src/text_splitter.js @@ -1,6 +1,6 @@ -export const PID_RE=/(^|[^\d])([1-9]\d{4,5})(?!\d|\u20e3)/g; +export const PID_RE=/(^|[^\d])([1-9]\d{4,5})(?!\d|\u20e3|\ufe0e|\ufe0f)/g; export const NICKNAME_RE=/(^|[^A-Za-z])((?:(?:Angry|Baby|Crazy|Diligent|Excited|Fat|Greedy|Hungry|Interesting|Japanese|Kind|Little|Magic|Naïve|Old|Powerful|Quiet|Rich|Superman|THU|Undefined|Valuable|Wifeless|Xiangbuchulai|Young|Zombie)\s)?(?:Alice|Bob|Carol|Dave|Eve|Francis|Grace|Hans|Isabella|Jason|Kate|Louis|Margaret|Nathan|Olivia|Paul|Queen|Richard|Susan|Thomas|Uma|Vivian|Winnie|Xander|Yasmine|Zach)|You Win(?: \d+)?|洞主)(?![A-Za-z])/gi; -export const URL_RE=/(?:^|\b)((?:https?:\/\/)?(?:[\w-]+\.)+[a-zA-Z]{2,3}(?::\d{1,5})?(?:\/[\w~!@#$%^&*()-_=+[\];,./?]*)?)/gi; +export const URL_RE=/(?:^|\b)((?:https?:\/\/)?(?:(?:[\w-]+\.)+[a-zA-Z]{2,3}|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})(?::\d{1,5})?(?:\/[\w~!@#$%^&*()-_=+[\];,./?]*)?)(?![a-zA-Z0-9])/gi; export function split_text(txt,rules) { // rules: [['name',/regex/],...]