Algorithm (Binary Search + Rolling Hash)
- Let $f(x)$ be the cardinality of the set of duplicate substrings with length $x$. Clearly $f$ is non-increasing.
- The problem then could be reduced to computing $$\sup\{x: f(x) > 0 \text{ and } 1 \leq x \leq |S|, x \in \mathbb{Z}\}$$
- Note that step(2) could be solved using binary search.
- Note that $f(x)$ can be computing using Rolling Polynomial Hash on $\mathbb{Z}_p$, where $p$ is a relatively large prime.
- It is an interesting excercise to compute the lower bound of $p$ such that $\mathbb{P}(\text{hash collison}) < 10^{-5}$. We could also use double rolling hash with two primes near $10^9 + 7$ to reduce the probability of collision.
Time Complexity
- Evaluating $f$ once is $O(|S|)$.
- Multiplied by binary search complexity, the total running time is $O(|S|\log|S|)$.
Memory
- $O(|S|\log|S|)$. But could be optimized further using a single global memo set for rolling hash.
Code
class Solution {
public:
string longestDupSubstring(string S) {
typedef std::integral_constant<long long, (long long)(1049179854847)> mod;
using math::field::Z;
const int n = S.length();
const auto power = [&](vector<Z<mod>> self = {}) {
self.resize(n, Z<mod>{1});
for (int i = 1; i < n; i++)
self[i] = self[i - 1] * 256;
return self;
}();
auto binary_search = [&](int lo, int hi, optional<pair<int, int>> target, auto f) {
optional<pair<int, int>> result = {};
auto search = rec([&](auto&& search, int lo, int hi) -> void {
int mid = lo + (hi - lo) / 2;
if (lo > hi) return;
else if (const auto fmid = f(mid); fmid) (result = fmid, search(mid + 1, hi));
else if (not fmid) search(lo, mid - 1);
});
return search(lo, hi), result;
};
auto ascii_code = [&](char ch) { return 0 + ch; };
auto f = [&, table = unordered_set<Z<mod>> {}](int len) mutable -> optional<pair<int, int>> {
table.clear();
auto rolling_hash = [&](Z<mod> acc = 0) {
for (int i = 0; i < len; ++i) acc = acc * 256 + ascii_code(S[i]);
return acc;
}();
table.insert(rolling_hash);
for (int i = len; i < n; i++) {
rolling_hash = 256 * (rolling_hash - power[len - 1] * ascii_code(S[i - len])) + ascii_code(S[i]);
if(auto [_, success] = table.insert(rolling_hash); not success)
return {pair(i - len + 1, len)};
}
return {};
};
const auto solution = [&] {
if (const auto result = binary_search(1, n, std::nullopt, f); result)
return S.substr((*result).first, (*result).second);
else
return std::string();
}();
return solution;
}};
Utility Code
template <class F>
struct recursive {
F f;
template <class... Ts>
decltype(auto) operator()(Ts&&... ts) const { return f(std::ref(*this), std::forward<Ts>(ts)...); }
template <class... Ts>
decltype(auto) operator()(Ts&&... ts) { return f(std::ref(*this), std::forward<Ts>(ts)...); }
};
template <class F> recursive(F) -> recursive<F>;
auto const rec = [](auto f){ return recursive{std::move(f)}; };
namespace math::field {
template <typename T>
T inverse(T a, T m) {
T u = 0, v = 1;
while (a != 0) {
T t = m / a;
m -= t * a; swap(a, m);
u -= t * v; swap(u, v);
}
assert(m == 1);
return u;
}
template <typename T>
class Z {
public:
using value_type = typename T::value_type;
constexpr static value_type mod() { return T::value; }
// constructors
constexpr Z() : value() {}
template <typename U> Z(const U& x) { value = normalize(x); }
template <typename U> static value_type normalize(const U& x) {
if (0 <= x and x < mod()) return value_type(x);
else if (x >= mod()) return value_type(x % mod());
else return value_type(x + mod());
}
Z& operator+=(const Z& other) { if ((value += other.value) >= mod()) value -= mod(); return *this; }
Z& operator-=(const Z& other) { if ((value -= other.value) < 0) value += mod(); return *this; }
Z& operator++() { return *this += 1; }
Z& operator--() { return *this -= 1; }
Z operator++(int) { Z result(*this); *this += 1; return result; }
Z operator--(int) { Z result(*this); *this -= 1; return result; }
Z operator-() const { return Z(-value); }
Z& operator*=(const Z& rhs) { value = (value % mod() * rhs.value % mod()) % mod(); return *this; }
Z& operator/=(const Z& other) { return *this *= Z(inverse(other.value, mod())); }
const value_type& operator()() const { return value; }
template <typename U> explicit operator U() const { return static_cast<U>(value); }
template <typename U> Z& operator+=(const U& other) { return *this += Z(other); }
template <typename U> Z& operator-=(const U& other) { return *this -= Z(other); }
template <typename U> friend const Z<U>& abs(const Z<U>& v) { return v; }
template <typename U> friend bool operator==(const Z<U>& lhs, const Z<U>& rhs);
template <typename U> friend bool operator<(const Z<U>& lhs, const Z<U>& rhs);
template <typename U> friend std::istream& operator>>(std::istream& stream, Z<U>& number);
private:
value_type value;
};
template <typename T> bool operator==(const Z<T>& lhs, const Z<T>& rhs) { return lhs.value == rhs.value; }
template <typename T, typename U> bool operator==(const Z<T>& lhs, U rhs) { return lhs == Z<T>(rhs); }
template <typename T, typename U> bool operator==(U lhs, const Z<T>& rhs) { return Z<T>(lhs) == rhs; }
template <typename T> bool operator!=(const Z<T>& lhs, const Z<T>& rhs) { return !(lhs == rhs); }
template <typename T, typename U> bool operator!=(const Z<T>& lhs, U rhs) { return !(lhs == rhs); }
template <typename T, typename U> bool operator!=(U lhs, const Z<T>& rhs) { return !(lhs == rhs); }
template <typename T> bool operator<(const Z<T>& lhs, const Z<T>& rhs) { return lhs.value < rhs.value; }
template <typename T> Z<T> operator+(const Z<T>& lhs, const Z<T>& rhs) { return Z<T>(lhs) += rhs; }
template <typename T, typename U> Z<T> operator+(const Z<T>& lhs, U rhs) { return Z<T>(lhs) += rhs; }
template <typename T, typename U> Z<T> operator+(U lhs, const Z<T>& rhs) { return Z<T>(lhs) += rhs; }
template <typename T> Z<T> operator-(const Z<T>& lhs, const Z<T>& rhs) { return Z<T>(lhs) -= rhs; }
template <typename T, typename U> Z<T> operator-(const Z<T>& lhs, U rhs) { return Z<T>(lhs) -= rhs; }
template <typename T, typename U> Z<T> operator-(U lhs, const Z<T>& rhs) { return Z<T>(lhs) -= rhs; }
template <typename T> Z<T> operator*(const Z<T>& lhs, const Z<T>& rhs) { return Z<T>(lhs) *= rhs; }
template <typename T, typename U> Z<T> operator*(const Z<T>& lhs, U rhs) { return Z<T>(lhs) *= rhs; }
template <typename T, typename U> Z<T> operator*(U lhs, const Z<T>& rhs) { return Z<T>(lhs) *= rhs; }
template <typename T> Z<T> operator/(const Z<T>& lhs, const Z<T>& rhs) { return Z<T>(lhs) /= rhs; }
template <typename T, typename U> Z<T> operator/(const Z<T>& lhs, U rhs) { return Z<T>(lhs) /= rhs; }
template <typename T, typename U> Z<T> operator/(U lhs, const Z<T>& rhs) { return Z<T>(lhs) /= rhs; }
// using namespace std;;
template<typename T, typename U>
Z<T> power(const Z<T>& a, const U& b) {
assert(b >= 0);
Z<T> x = a, res = 1;
U p = b;
while (p > 0) {
if (p & 1) res *= x;
x *= x;
p >>= 1;
}
return res;
}
template <typename T>
bool is_zero(const Z<T>& number) {
return number() == 0;
}
} // end of namespace
template<typename T> struct std::hash<math::field::Z<T>> {
std::size_t operator()(math::field::Z<T> const& z) const noexcept {
return std::hash<typename T::value_type>{}(z());
}
};