diff --git a/README.md b/README.md index d87473b..5d543da 100644 --- a/README.md +++ b/README.md @@ -8,35 +8,10 @@ ac-library-python is a Python port of [AtCoder Library (ACL)](https://atcoder.jp ## Progress -**Note: There is no test code at this time.** +All features are ported but may contain bugs. Some of them need to be improved speed, especially `modint` and `lazysegtree`. -### Implemented +We are working on adding unit tests and documents. -#### Data structure - -+ [Fenwick Tree](https://github.com/atcoder/ac-library/blob/master/document_en/fenwicktree.md) -+ segtree -+ lazysegtree - -#### Math - -+ convolution -+ math -+ modint - -#### Graph - -+ [Disjoint Set Union (DSU)](https://github.com/atcoder/ac-library/blob/master/document_en/dsu.md) -+ maxflow -+ mincostflow -+ scc -+ twosat - -### Work in progress - -#### Data structure - -+ string ## Install diff --git a/README_ja.md b/README_ja.md index 6069e95..e8e795b 100644 --- a/README_ja.md +++ b/README_ja.md @@ -6,35 +6,9 @@ ac-library-pythonは、[AtCoder Library (ACL)](https://atcoder.jp/posts/517)のP ## 進捗状況 -**注: 現時点ではテストコードがないため、ご利用には十分ご注意ください。** +移植は完了していますが、まだバグが残っている可能性があります。また、`modint`や`lazysegtree`などのライブラリは速度の改善が必要です。 -### 実装済み - -#### データ構造 - -+ [Fenwick Tree](https://github.com/atcoder/ac-library/blob/master/document_ja/fenwicktree.md) -+ segtree -+ lazysegtree - -#### 数学 - -+ convolution -+ math -+ modint - -#### グラフ - -+ [Disjoint Set Union (DSU)](https://github.com/atcoder/ac-library/blob/master/document_ja/dsu.md) -+ maxflow -+ mincostflow -+ scc -+ twosat - -### 準備中 - -#### データ構造 - -+ string +現在、単体テストとドキュメントの追加を行っています。 ## インストール diff --git a/atcoder/string.py b/atcoder/string.py new file mode 100644 index 0000000..f7db629 --- /dev/null +++ b/atcoder/string.py @@ -0,0 +1,250 @@ +import copy +import functools +import typing + + +def _sa_naive(s: typing.List[int]) -> typing.List[int]: + sa = list(range(len(s))) + return sorted(sa, key=lambda i: s[i:]) + + +def _sa_doubling(s: typing.List[int]) -> typing.List[int]: + n = len(s) + sa = list(range(n)) + rnk = copy.deepcopy(s) + tmp = [0] * n + k = 1 + while k < n: + def cmp(x: int, y: int) -> bool: + if rnk[x] != rnk[y]: + return rnk[x] - rnk[y] + rx = rnk[x + k] if x + k < n else -1 + ry = rnk[y + k] if y + k < n else -1 + return rx - ry + sa.sort(key=functools.cmp_to_key(cmp)) + tmp[sa[0]] = 0 + for i in range(1, n): + tmp[sa[i]] = tmp[sa[i - 1]] + (1 if cmp(sa[i - 1], sa[i]) else 0) + tmp, rnk = rnk, tmp + k *= 2 + return sa + + +def _sa_is(s: typing.List[int], upper: int) -> typing.List[int]: + ''' + SA-IS, linear-time suffix array construction + Reference: + G. Nong, S. Zhang, and W. H. Chan, + Two Efficient Algorithms for Linear Time Suffix Array Construction + ''' + + threshold_naive = 10 + threshold_doubling = 40 + + n = len(s) + + if n == 0: + return [] + if n == 1: + return [0] + if n == 2: + if s[0] < s[1]: + return [0, 1] + else: + return [1, 0] + + if n < threshold_naive: + return _sa_naive(s) + if n < threshold_doubling: + return _sa_doubling(s) + + sa = [0] * n + ls = [False] * n + for i in range(n - 2, -1, -1): + if s[i] == s[i + 1]: + ls[i] = ls[i + 1] + else: + ls[i] = s[i] < s[i + 1] + + sum_l = [0] * (upper + 1) + sum_s = [0] * (upper + 1) + for i in range(n): + if not ls[i]: + sum_s[s[i]] += 1 + else: + sum_l[s[i] + 1] += 1 + for i in range(upper + 1): + sum_s[i] += sum_l[i] + if i < upper: + sum_l[i + 1] += sum_s[i] + + def induce(lms: typing.List[int]) -> None: + nonlocal sa + sa = [-1] * n + + buf = copy.deepcopy(sum_s) + for d in lms: + if d == n: + continue + sa[buf[s[d]]] = d + buf[s[d]] += 1 + + buf = copy.deepcopy(sum_l) + sa[buf[s[n - 1]]] = n - 1 + buf[s[n - 1]] += 1 + for i in range(n): + v = sa[i] + if v >= 1 and not ls[v - 1]: + sa[buf[s[v - 1]]] = v - 1 + buf[s[v - 1]] += 1 + + buf = copy.deepcopy(sum_l) + for i in range(n - 1, -1, -1): + v = sa[i] + if v >= 1 and ls[v - 1]: + buf[s[v - 1] + 1] -= 1 + sa[buf[s[v - 1] + 1]] = v - 1 + + lms_map = [-1] * (n + 1) + m = 0 + for i in range(1, n): + if not ls[i - 1] and ls[i]: + lms_map[i] = m + m += 1 + lms = [] + for i in range(1, n): + if not ls[i - 1] and ls[i]: + lms.append(i) + + induce(lms) + + if m: + sorted_lms = [] + for v in sa: + if lms_map[v] != -1: + sorted_lms.append(v) + rec_s = [0] * m + rec_upper = 0 + rec_s[lms_map[sorted_lms[0]]] = 0 + for i in range(1, m): + left = sorted_lms[i - 1] + right = sorted_lms[i] + if lms_map[left] + 1 < m: + end_l = lms[lms_map[left] + 1] + else: + end_l = n + if lms_map[right] + 1 < m: + end_r = lms[lms_map[right] + 1] + else: + end_r = n + + same = True + if end_l - left != end_r - right: + same = False + else: + while left < end_l: + if s[left] != s[right]: + break + left += 1 + right += 1 + if left == n or s[left] != s[right]: + same = False + + if not same: + rec_upper += 1 + rec_s[lms_map[sorted_lms[i]]] = rec_upper + + rec_sa = _sa_is(rec_s, rec_upper) + + for i in range(m): + sorted_lms[i] = lms[rec_sa[i]] + induce(sorted_lms) + + return sa + + +def suffix_array(s: typing.Union[str, typing.List[int]], + upper: typing.Optional[int] = None) -> typing.List[int]: + if isinstance(s, str): + return _sa_is([ord(c) for c in s], 255) + elif upper is None: + n = len(s) + idx = list(range(n)) + idx.sort(key=functools.cmp_to_key(lambda l, r: s[l] - s[r])) + s2 = [0] * n + now = 0 + for i in range(n): + if i and s[idx[i - 1]] != s[idx[i]]: + now += 1 + s2[idx[i]] = now + return _sa_is(s2, now) + else: + assert 0 <= upper + for d in s: + assert 0 <= d <= upper + + return _sa_is(s, upper) + + +def lcp_array(s: typing.Union[str, typing.List[int]], + sa: typing.List[int]) -> typing.List[int]: + ''' + Reference: + T. Kasai, G. Lee, H. Arimura, S. Arikawa, and K. Park, + Linear-Time Longest-Common-Prefix Computation in Suffix Arrays and Its + Applications + ''' + + if isinstance(s, str): + s = [ord(c) for c in s] + + n = len(s) + assert n >= 1 + + rnk = [0] * n + for i in range(n): + rnk[sa[i]] = i + + lcp = [0] * (n - 1) + h = 0 + for i in range(n): + if h > 0: + h -= 1 + if rnk[i] == 0: + continue + j = sa[rnk[i] - 1] + while j + h < n and i + h < n: + if s[j + h] != s[i + h]: + break + h += 1 + lcp[rnk[i] - 1] = h + + return lcp + + +def z_algorithm(s: typing.Union[str, typing.List[int]]) -> typing.List[int]: + ''' + Reference: + D. Gusfield, + Algorithms on Strings, Trees, and Sequences: Computer Science and + Computational Biology + ''' + + if isinstance(s, str): + s = [ord(c) for c in s] + + n = len(s) + if n == 0: + return [] + + z = [0] * n + j = 0 + for i in range(1, n): + z[i] = 0 if j + z[j] <= i else min(j + z[j] - i, z[i - j]) + while i + z[i] < n and s[z[i]] == s[i + z[i]]: + z[i] += 1 + if j + z[j] < i + z[i]: + j = i + z[0] = n + + return z diff --git a/example/sa_practice.py b/example/sa_practice.py new file mode 100644 index 0000000..b0bcdfb --- /dev/null +++ b/example/sa_practice.py @@ -0,0 +1,19 @@ +# https://atcoder.jp/contests/practice2/tasks/practice2_i + +import sys + +from atcoder.string import suffix_array, lcp_array + + +def main() -> None: + s = sys.stdin.readline().strip() + sa = suffix_array(s) + + answer = len(s) * (len(s) + 1) // 2 + for x in lcp_array(s, sa): + answer -= x + print(answer) + + +if __name__ == '__main__': + main()