|
| 1 | +"""Program for Bad Character Heuristic |
| 2 | +of Boyer Moore String Matching Algorithm""" |
| 3 | + |
| 4 | + |
| 5 | +NO_OF_CHARS = 256 |
| 6 | + |
| 7 | +def badCharHeuristic(string, size): |
| 8 | + ''' |
| 9 | + The preprocessing function for |
| 10 | + Boyer Moore's bad character heuristic |
| 11 | + ''' |
| 12 | + # Initialize all occurrences as -1 |
| 13 | + badChar = [-1] * NO_OF_CHARS |
| 14 | + |
| 15 | + # Fill the actual value of the last occurrence |
| 16 | + for i in range(size): |
| 17 | + badChar[ord(string[i])] = i |
| 18 | + |
| 19 | + # Return the initialized list |
| 20 | + return badChar |
| 21 | + |
| 22 | +def search(txt, pat): |
| 23 | + ''' |
| 24 | + A pattern searching function that uses the Bad Character |
| 25 | + Heuristic of the Boyer Moore Algorithm |
| 26 | + ''' |
| 27 | + m = len(pat) |
| 28 | + n = len(txt) |
| 29 | + |
| 30 | + # Create the bad character list by calling |
| 31 | + # the preprocessing function badCharHeuristic() |
| 32 | + # for the given pattern |
| 33 | + badChar = badCharHeuristic(pat, m) |
| 34 | + |
| 35 | + # s is the shift of the pattern with respect to the text |
| 36 | + s = 0 |
| 37 | + while s <= n - m: |
| 38 | + j = m - 1 |
| 39 | + |
| 40 | + # Keep reducing index j of the pattern while |
| 41 | + # characters of the pattern and text are matching |
| 42 | + # at this shift s |
| 43 | + while j >= 0 and pat[j] == txt[s + j]: |
| 44 | + j -= 1 |
| 45 | + |
| 46 | + # If the pattern is present at the current shift, |
| 47 | + # then index j will become -1 after the above loop |
| 48 | + if j < 0: |
| 49 | + print("Pattern occurs at shift =", s) |
| 50 | + |
| 51 | + ''' |
| 52 | + Shift the pattern so that the next character in the text |
| 53 | + aligns with the last occurrence of it in the pattern. |
| 54 | + The condition s+m < n is necessary for the case when |
| 55 | + the pattern occurs at the end of the text |
| 56 | + ''' |
| 57 | + s += (m - badChar[ord(txt[s + m])] if s + m < n else 1) |
| 58 | + else: |
| 59 | + ''' |
| 60 | + Shift the pattern so that the bad character in the text |
| 61 | + aligns with the last occurrence of it in the pattern. The |
| 62 | + max function is used to make sure that we get a positive |
| 63 | + shift. We may get a negative shift if the last occurrence |
| 64 | + of the bad character in the pattern is on the right side of the |
| 65 | + current character. |
| 66 | + ''' |
| 67 | + s += max(1, j - badChar[ord(txt[s + j])]) |
| 68 | + |
| 69 | +while True: |
| 70 | + txt = input('Enter the text (or press Enter to exit): ') |
| 71 | + if not txt: |
| 72 | + break |
| 73 | + pat = input('Enter the pattern to search for: ') |
| 74 | + search(txt, pat) |
0 commit comments