使用方法和 truncatewords 一样,
{{ value|truncatehanzi:6 }}
如果 value 为“截取English、Γρεεκ等字母语言和CJK汉字。”,输出“截取English、Γρεεκ等字...”。代码分为两个部分,截取汉字的脚本 truncate_hanzi.py 和生成过滤器的脚本 filters.py。
truncate_hanzi.py
# -*- coding: UTF-8 -*-
import re
from django.utils.encoding import force_unicode
from django.utils.functional import allow_lazy
def truncate_hanzi(s, num):
s = force_unicode(s)
length = int(num)
if length <= 0:
return u'...'
# Set up regex for alphanumeric characters
# \u00c0-\u02af: Latin
# \u0370-\u1fff: Greek and alphabet characters in other language
re_alnum = re.compile(ur'[a-zA-Z0-9_\-\u00c0-\u02af\u0370-\u1fff]', re.U)
# Set up regex for hanzi
# \u3040-\ufaff: CJK characters
re_hanzi = re.compile(ur'[\u3040-\ufaff]', re.U)
hanzi = u''
hanzi_len = 0
word_temp = u''
for char in s:
# Check for alphabet characters
if re_alnum.match(char):
word_temp += char
continue
if word_temp:
hanzi += word_temp
hanzi_len += 1
word_temp = ''
# Check for length
if hanzi_len >= length:
if not hanzi.endswith('...'):
hanzi += '...'
break
# Check for hanzi
if re_hanzi.match(char):
hanzi_len += 1
hanzi += char
hanzi += word_temp
return hanzi
truncate_hanzi = allow_lazy(truncate_hanzi, unicode)
def demo():
print truncate_hanzi('截取段落工具,支持English、Γρεεκ等字母语言和CJK汉字。', 6)
print truncate_hanzi('截取段落工具,支持English、Γρεεκ等字母语言和CJK汉字。', 11)
print truncate_hanzi('截取段落工具,支持English、Γρεεκ等字母语言和CJK汉字。', 20)
if __name__ == '__main__':
demo()
filters.py
# -*- coding: UTF-8 -*-
from django.template import Library
from django.template.defaultfilters import stringfilter
register = Library()
@stringfilter
def truncatehanzi(value, arg):
"""
Truncates a string after a certain number of words including
alphanumeric and CJK characters.
Argument: Number of words to truncate after.
"""
from truncate_hanzi import truncate_hanzi
try:
length = int(arg)
except ValueError: # Invalid literal for int().
return value # Fail silently.
return truncate_hanzi(value, length)
truncatehanzi.is_safe = True
register.filter('truncatehanzi', truncatehanzi)


