Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
File renamed without changes.
104 changes: 104 additions & 0 deletions Algorithms/SuffixArrayLCP.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/*
* File: AStar.java
* Author: Shashank S
* Description: Description:
* This program constructs a Suffix Array and computes the Longest Common Prefix (LCP) array for a given string.
* The Suffix Array provides an efficient way to represent all suffixes of a string in lexicographical order,
* while the LCP array stores the length of the longest common prefix between consecutive suffixes.
* These data structures are fundamental for solving problems in string processing such as pattern matching,
* substring queries, and text compression algorithms.
* Date: 2025-10-16
*/


import java.util.*;

public class SuffixArrayLCP {
public static Integer[] buildSA(String s) {
int n = s.length();
Integer[] sa = new Integer[n];
int[] ranks = new int[n];
int[] tmp = new int[n];

for (int i = 0; i < n; i++) {
sa[i] = i;
ranks[i] = s.charAt(i);
}

for (int k = 1; k < n; k <<= 1) {
final int K = k;
Arrays.sort(sa, (a, b) -> {
if (ranks[a] != ranks[b]) return Integer.compare(ranks[a], ranks[b]);
int ra = a + K < n ? ranks[a + K] : -1;
int rb = b + K < n ? ranks[b + K] : -1;
return Integer.compare(ra, rb);
});

tmp[sa[0]] = 0;
for (int i = 1; i < n; i++) {
tmp[sa[i]] = tmp[sa[i - 1]];
int prev = sa[i - 1], cur = sa[i];
if (ranks[prev] != ranks[cur]) {
tmp[cur]++;
} else {
int rprev = prev + K < n ? ranks[prev + K] : -1;
int rcur = cur + K < n ? ranks[cur + K] : -1;
if (rprev != rcur) tmp[cur]++;
}
}
System.arraycopy(tmp, 0, ranks, 0, n);
if (ranks[sa[n - 1]] == n - 1) break;
}
return sa;
}

public static int[] buildLCP(String s, Integer[] sa) {
int n = s.length();
int[] rank = new int[n];
for (int i = 0; i < n; i++) rank[sa[i]] = i;
int[] lcp = new int[n];
int h = 0;
for (int i = 0; i < n; i++) {
int r = rank[i];
if (r > 0) {
int j = sa[r - 1];
while (i + h < n && j + h < n && s.charAt(i + h) == s.charAt(j + h)) h++;
lcp[r] = h;
if (h > 0) h--;
}
}
return lcp;
}

public static long countDistinctSubstrings(String s) {
int n = s.length();
Integer[] sa = buildSA(s);
int[] lcp = buildLCP(s, sa);
long total = (long) n * (n + 1) / 2;
long sumLcp = 0;
for (int x : lcp) sumLcp += x;
return total - sumLcp;
}

public static void main(String[] args) {
String s = "banana";
if (args.length > 0) s = args[0];
Integer[] sa = buildSA(s);
int[] lcp = buildLCP(s, sa);

System.out.println("String: " + s);
System.out.println("Index\tSA\tSuffix\tLCP");
for (int i = 0; i < s.length(); i++) {
System.out.printf("%d\t%d\t%s\t%d\n", i, sa[i], s.substring(sa[i]), lcp[i]);
}
System.out.println("Distinct substrings: " + countDistinctSubstrings(s));
}
}

/*
How to compile and run:
* javac SuffixArrayLCP.java
java SuffixArrayLCP
# or with custom input
java SuffixArrayLCP "abracadabra"
*/
115 changes: 115 additions & 0 deletions Algorithms/SuffixAutomation.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/*
* File: AStar.java
* Author: Shashank S
* Description: Description:
* This program constructs a Suffix Array and computes the Longest Common Prefix (LCP) array for a given string.
* The Suffix Array provides an efficient way to represent all suffixes of a string in lexicographical order,
* while the LCP array stores the length of the longest common prefix between consecutive suffixes.
* These data structures are fundamental for solving problems in string processing such as pattern matching,
* substring queries, and text compression algorithms.
* Date: 2025-10-16
*/

// SuffixAutomaton.java
// Usage: javac SuffixAutomaton.java && java SuffixAutomaton
import java.util.*;

public class SuffixAutomation {
static class State {
int len, link;
Map<Character, Integer> next = new HashMap<>();
State(int len) { this.len = len; this.link = -1; }
}

private final ArrayList<State> st;
private int last;

public SuffixAutomation(String s) {
st = new ArrayList<>();
st.add(new State(0)); // state 0
last = 0;
for (char c : s.toCharArray()) extend(c);
}

private void extend(char c) {
int cur = st.size();
st.add(new State(st.get(last).len + 1));
int p = last;
while (p != -1 && !st.get(p).next.containsKey(c)) {
st.get(p).next.put(c, cur);
p = st.get(p).link;
}
if (p == -1) {
st.get(cur).link = 0;
} else {
int q = st.get(p).next.get(c);
if (st.get(p).len + 1 == st.get(q).len) {
st.get(cur).link = q;
} else {
int clone = st.size();
State cloned = new State(st.get(p).len + 1);
cloned.next.putAll(st.get(q).next);
cloned.link = st.get(q).link;
st.add(cloned);
while (p != -1 && st.get(p).next.get(c) == q) {
st.get(p).next.put(c, clone);
p = st.get(p).link;
}
st.get(q).link = st.get(cur).link = clone;
}
}
last = cur;
}

// Number of different substrings = sum(len[v] - len[link[v]])
public long distinctSubstringsCount() {
long res = 0;
for (int i = 1; i < st.size(); i++) {
res += st.get(i).len - st.get(st.get(i).link).len;
}
return res;
}

// Find LCS of this SAM's string and another string t
public int longestCommonSubstring(String t) {
int v = 0, l = 0, best = 0;
for (char c : t.toCharArray()) {
if (st.get(v).next.containsKey(c)) {
v = st.get(v).next.get(c);
l++;
} else {
while (v != -1 && !st.get(v).next.containsKey(c)) v = st.get(v).link;
if (v == -1) { v = 0; l = 0; }
else {
l = st.get(v).len + 1;
v = st.get(v).next.get(c);
}
}
if (l > best) best = l;
}
return best;
}

// Demo / test
public static void main(String[] args) {
String s = "ababa";
if (args.length > 0) s = args[0];
SuffixAutomation sam = new SuffixAutomation(s);
System.out.println("Original string: " + s);
System.out.println("Number of states: " + sam.st.size());
System.out.println("Distinct substrings count: " + sam.distinctSubstringsCount());

// LCS test with another string
String t = "baba";
System.out.println("LCS with '" + t + "': " + sam.longestCommonSubstring(t));
}
}// SuffixArrayLCP.java
// Usage: javac SuffixArrayLCP.java && java SuffixArrayLCP

//How to run
/*
javac SuffixAutomaton.java
java SuffixAutomaton
# with custom:
java SuffixAutomaton "banana"
*/