1. Searching in columns
For this assignment you are to write a program search.c that finds all occurrences of a given string (the needle) in a text (the haystack). However, unlike standard-issue search programs like grep, your program should find vertical occurrences of the needle, when reading the text down in columns.
2. Example
In the text
aba ab ba
the string ba occurs in exactly one place: starting from column 1 of line 1 (counting from 0 in both cases):
aba aB bA
3. Input format
Your program should read the haystack from stdin. It should obtain the needle from argv[1]. It is good practice to test argc to make sure that argv[1] exists, but you will not be tested on any inputs that do not provide exactly one argument. You may assume that each line of the haystack is terminated by a newline and that neither the haystack nor the needle contain any null characters.
4. Output format
For each starting line and column where the needle matches, your program should output a single line of text giving the starting line and column numbers (counting from 0). This line should be formatted as if printed with the function call
1 printf("%d %d\n", line, column);
If there are multiple matches, each should be printed in order of increasing line number first and then increasing column numbers within the same line. For example, the output from a search for aa in the haystack
aaa aa a
should be
0 0 0 1 1 0
5. Things to watch out for
5.1. Special characters
Your program should allow any character except '\0' or '\n' within both the haystack and needle. The column number of a character is defined as the number of characters that precede it in the same line, regardless of how it looks when printed on the screen (e.g. a tab character '\t' may produce up to 8 spaces on the screen but still counts as only one character for the purpose of computing column positions).
5.2. Short lines
A match can occur only in a column only if there are enough characters in each of the lines that the column spans. You will need to do something to make sure that searches do not attempt to include missing characters from short lines.
6. Submitting your program
Submit your program as usual with the command
/c/cs223/bin/submit 3 search.c
7. Testing your program
A public test script is available on the Zoo as /c/cs223/Hwk3/test.search.
8. Sample solutions
1 /* Vertical word search program */
2 /* Takes as input a sequence of lines of possibly varying length. */
3 /* Searches each column for argv[1] */
4 /* Prints the line number and column number of any matching locations (counting from 0) */
5 /* in increasing order by line number then column number. */
6
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10
11 typedef struct line {
12 int len; /* length of data */
13 char *data; /* null-terminated contents */
14 } *Line;
15
16 /* used internally by getline; initial size of buffer */
17 #define INITIAL_LINE_SIZE (16)
18
19 /* reads a line of text terminated by newline or end-of-file and returns it */
20 /* returns 0 on allocation error or end-of-file at start of line */
21 Line
22 getline(FILE *f)
23 {
24 int size;
25 Line line;
26 int c;
27
28 line = malloc(sizeof(*line));
29 if(line == 0) return 0;
30
31 size = INITIAL_LINE_SIZE;
32 line->len = 0;
33 line->data = malloc(size);
34
35 for(;;) {
36 /* grow the buffer if full */
37 if(line->len >= size) {
38 size *= 2;
39 line->data = realloc(line->data, size);
40 if(line->data == 0) {
41 free(line);
42 return 0;
43 }
44 }
45 /* grab and parse the next character */
46 c = getc(f);
47 if(c == '\n') {
48 /* we are done */
49 break;
50 } else if(c == EOF) {
51 if(line->len == 0) {
52 /* end of file at start of line */
53 free(line->data);
54 free(line);
55 return 0;
56 } else {
57 /* we are done again */
58 break;
59 }
60 } else {
61 /* add it in */
62 line->data[line->len++] = c;
63 }
64 }
65
66 /* we are done; clean up */
67 /* mark end of string */
68 line->data[line->len] = '\0';
69 /* give up extra space in buffer */
70 line->data = realloc(line->data, line->len + 1);
71 return line;
72 }
73
74 void
75 free_line(Line line)
76 {
77 free(line->data);
78 free(line);
79 }
80
81 #define INITIAL_LINES (16) /* initial size of lines buffer in getlines */
82
83 /* get a bunch of lines, terminating when getline fails */
84 /* returns an array of the lines in order, with the number of lines stored
85 * in *n */
86 /* returns 0 on allocation error */
87 Line *
88 getlines(FILE *f, int *n)
89 {
90 int size;
91 Line *lines;
92 Line next_line;
93
94 size = INITIAL_LINES;
95 *n = 0;
96 lines = malloc(sizeof(*lines) * size);
97 if(lines == 0) return 0;
98
99 while((next_line = getline(f)) != 0) {
100 /* maybe grow buffer */
101 if(*n >= size) {
102 size *= 2;
103 lines = realloc(lines, sizeof(*lines) * size);
104 if(lines == 0) return 0;
105 }
106 /* store the new line */
107 lines[(*n)++] = next_line;
108 }
109
110 /* shrink to fit */
111 lines = realloc(lines, sizeof(*lines) * (*n));
112
113 return lines;
114 }
115
116 void
117 free_lines(Line *lines, int n)
118 {
119 int i;
120
121 for(i = 0; i < n; i++) {
122 free_line(lines[i]);
123 }
124 free(lines);
125 }
126
127 /* return true if needle occurs in first numlines entries in lines */
128 /* starting at position (line, column) */
129 int
130 occurs_at(Line *lines, int numlines, int line, int column, const char *needle)
131 {
132 /* strategy: we'll march line and needle together, returning 0 on mismatch */
133 while(line < numlines && *needle) {
134 if(lines[line]->len <= column) {
135 /* this line isn't long enough, we lose */
136 return 0;
137 } else if(lines[line]->data[column] != *needle) {
138 /* character doesn't match, we lose again */
139 return 0;
140 } else {
141 /* keep going */
142 line++;
143 needle++;
144 }
145 }
146
147 /* did we reach end of needle or end of lines? */
148 return *needle == '\0';
149 }
150
151 int
152 main(int argc, char **argv)
153 {
154 Line *lines;
155 int n;
156 int maxstart; /* n - length of needle; last possible starting location */
157 int line;
158 int column;
159 const char *needle;
160
161 if(argc != 2) {
162 fprintf(stderr, "Usage: %s needle < lines\n", argv[0]);
163 return 1;
164 }
165
166 needle = argv[1];
167
168 lines = getlines(stdin, &n);
169
170 maxstart = n - strlen(needle);
171
172 for(line = 0; line <= maxstart; line++) {
173 for(column = 0; column < lines[line]->len; column++) {
174 if(occurs_at(lines, n, line, column, needle)) {
175 printf("%d %d\n", line, column);
176 }
177 }
178 }
179
180 free_lines(lines, n);
181
182 return 0;
183 }