testRegexp.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402
  1. /*
  2. * testRegexp.c: simple module for testing regular expressions
  3. *
  4. * See Copyright for the status of this software.
  5. *
  6. * Daniel Veillard <veillard@redhat.com>
  7. */
  8. #include "libxml.h"
  9. #ifdef LIBXML_REGEXP_ENABLED
  10. #include <string.h>
  11. #include <libxml/tree.h>
  12. #include <libxml/xmlregexp.h>
  13. static int repeat = 0;
  14. static int debug = 0;
  15. static void testRegexp(xmlRegexpPtr comp, const char *value) {
  16. int ret;
  17. ret = xmlRegexpExec(comp, (const xmlChar *) value);
  18. if (ret == 1)
  19. printf("%s: Ok\n", value);
  20. else if (ret == 0)
  21. printf("%s: Fail\n", value);
  22. else
  23. printf("%s: Error: %d\n", value, ret);
  24. if (repeat) {
  25. int j;
  26. for (j = 0;j < 999999;j++)
  27. xmlRegexpExec(comp, (const xmlChar *) value);
  28. }
  29. }
  30. static void
  31. testRegexpFile(const char *filename) {
  32. xmlRegexpPtr comp = NULL;
  33. FILE *input;
  34. char expression[5000];
  35. int len;
  36. input = fopen(filename, "r");
  37. if (input == NULL) {
  38. xmlGenericError(xmlGenericErrorContext,
  39. "Cannot open %s for reading\n", filename);
  40. return;
  41. }
  42. while (fgets(expression, 4500, input) != NULL) {
  43. len = strlen(expression);
  44. len--;
  45. while ((len >= 0) &&
  46. ((expression[len] == '\n') || (expression[len] == '\t') ||
  47. (expression[len] == '\r') || (expression[len] == ' '))) len--;
  48. expression[len + 1] = 0;
  49. if (len >= 0) {
  50. if (expression[0] == '#')
  51. continue;
  52. if ((expression[0] == '=') && (expression[1] == '>')) {
  53. char *pattern = &expression[2];
  54. if (comp != NULL) {
  55. xmlRegFreeRegexp(comp);
  56. comp = NULL;
  57. }
  58. printf("Regexp: %s\n", pattern) ;
  59. comp = xmlRegexpCompile((const xmlChar *) pattern);
  60. if (comp == NULL) {
  61. printf(" failed to compile\n");
  62. break;
  63. }
  64. } else if (comp == NULL) {
  65. printf("Regexp: %s\n", expression) ;
  66. comp = xmlRegexpCompile((const xmlChar *) expression);
  67. if (comp == NULL) {
  68. printf(" failed to compile\n");
  69. break;
  70. }
  71. } else if (comp != NULL) {
  72. testRegexp(comp, expression);
  73. }
  74. }
  75. }
  76. fclose(input);
  77. if (comp != NULL)
  78. xmlRegFreeRegexp(comp);
  79. }
  80. #ifdef LIBXML_EXPR_ENABLED
  81. static void
  82. runFileTest(xmlExpCtxtPtr ctxt, const char *filename) {
  83. xmlExpNodePtr expr = NULL, sub;
  84. FILE *input;
  85. char expression[5000];
  86. int len;
  87. input = fopen(filename, "r");
  88. if (input == NULL) {
  89. xmlGenericError(xmlGenericErrorContext,
  90. "Cannot open %s for reading\n", filename);
  91. return;
  92. }
  93. while (fgets(expression, 4500, input) != NULL) {
  94. len = strlen(expression);
  95. len--;
  96. while ((len >= 0) &&
  97. ((expression[len] == '\n') || (expression[len] == '\t') ||
  98. (expression[len] == '\r') || (expression[len] == ' '))) len--;
  99. expression[len + 1] = 0;
  100. if (len >= 0) {
  101. if (expression[0] == '#')
  102. continue;
  103. if ((expression[0] == '=') && (expression[1] == '>')) {
  104. char *str = &expression[2];
  105. if (expr != NULL) {
  106. xmlExpFree(ctxt, expr);
  107. if (xmlExpCtxtNbNodes(ctxt) != 0)
  108. printf(" Parse/free of Expression leaked %d\n",
  109. xmlExpCtxtNbNodes(ctxt));
  110. expr = NULL;
  111. }
  112. printf("Expression: %s\n", str) ;
  113. expr = xmlExpParse(ctxt, str);
  114. if (expr == NULL) {
  115. printf(" parsing Failed\n");
  116. break;
  117. }
  118. } else if (expr != NULL) {
  119. int expect = -1;
  120. int nodes1, nodes2;
  121. if (expression[0] == '0')
  122. expect = 0;
  123. if (expression[0] == '1')
  124. expect = 1;
  125. printf("Subexp: %s", expression + 2) ;
  126. nodes1 = xmlExpCtxtNbNodes(ctxt);
  127. sub = xmlExpParse(ctxt, expression + 2);
  128. if (sub == NULL) {
  129. printf(" parsing Failed\n");
  130. break;
  131. } else {
  132. int ret;
  133. nodes2 = xmlExpCtxtNbNodes(ctxt);
  134. ret = xmlExpSubsume(ctxt, expr, sub);
  135. if ((expect == 1) && (ret == 1)) {
  136. printf(" => accept, Ok\n");
  137. } else if ((expect == 0) && (ret == 0)) {
  138. printf(" => reject, Ok\n");
  139. } else if ((expect == 1) && (ret == 0)) {
  140. printf(" => reject, Failed\n");
  141. } else if ((expect == 0) && (ret == 1)) {
  142. printf(" => accept, Failed\n");
  143. } else {
  144. printf(" => fail internally\n");
  145. }
  146. if (xmlExpCtxtNbNodes(ctxt) > nodes2) {
  147. printf(" Subsume leaked %d\n",
  148. xmlExpCtxtNbNodes(ctxt) - nodes2);
  149. nodes1 += xmlExpCtxtNbNodes(ctxt) - nodes2;
  150. }
  151. xmlExpFree(ctxt, sub);
  152. if (xmlExpCtxtNbNodes(ctxt) > nodes1) {
  153. printf(" Parse/free leaked %d\n",
  154. xmlExpCtxtNbNodes(ctxt) - nodes1);
  155. }
  156. }
  157. }
  158. }
  159. }
  160. if (expr != NULL) {
  161. xmlExpFree(ctxt, expr);
  162. if (xmlExpCtxtNbNodes(ctxt) != 0)
  163. printf(" Parse/free of Expression leaked %d\n",
  164. xmlExpCtxtNbNodes(ctxt));
  165. }
  166. fclose(input);
  167. }
  168. static void
  169. testReduce(xmlExpCtxtPtr ctxt, xmlExpNodePtr expr, const char *tst) {
  170. xmlBufferPtr xmlExpBuf;
  171. xmlExpNodePtr sub, deriv;
  172. xmlExpBuf = xmlBufferCreate();
  173. sub = xmlExpParse(ctxt, tst);
  174. if (sub == NULL) {
  175. printf("Subset %s failed to parse\n", tst);
  176. return;
  177. }
  178. xmlExpDump(xmlExpBuf, sub);
  179. printf("Subset parsed as: %s\n",
  180. (const char *) xmlBufferContent(xmlExpBuf));
  181. deriv = xmlExpExpDerive(ctxt, expr, sub);
  182. if (deriv == NULL) {
  183. printf("Derivation led to an internal error, report this !\n");
  184. return;
  185. } else {
  186. xmlBufferEmpty(xmlExpBuf);
  187. xmlExpDump(xmlExpBuf, deriv);
  188. if (xmlExpIsNillable(deriv))
  189. printf("Resulting nillable derivation: %s\n",
  190. (const char *) xmlBufferContent(xmlExpBuf));
  191. else
  192. printf("Resulting derivation: %s\n",
  193. (const char *) xmlBufferContent(xmlExpBuf));
  194. xmlExpFree(ctxt, deriv);
  195. }
  196. xmlExpFree(ctxt, sub);
  197. }
  198. static void
  199. exprDebug(xmlExpCtxtPtr ctxt, xmlExpNodePtr expr) {
  200. xmlBufferPtr xmlExpBuf;
  201. xmlExpNodePtr deriv;
  202. const char *list[40];
  203. int ret;
  204. xmlExpBuf = xmlBufferCreate();
  205. if (expr == NULL) {
  206. printf("Failed to parse\n");
  207. return;
  208. }
  209. xmlExpDump(xmlExpBuf, expr);
  210. printf("Parsed as: %s\n", (const char *) xmlBufferContent(xmlExpBuf));
  211. printf("Max token input = %d\n", xmlExpMaxToken(expr));
  212. if (xmlExpIsNillable(expr) == 1)
  213. printf("Is nillable\n");
  214. ret = xmlExpGetLanguage(ctxt, expr, (const xmlChar **) &list[0], 40);
  215. if (ret < 0)
  216. printf("Failed to get list: %d\n", ret);
  217. else {
  218. int i;
  219. printf("Language has %d strings, testing string derivations\n", ret);
  220. for (i = 0;i < ret;i++) {
  221. deriv = xmlExpStringDerive(ctxt, expr, BAD_CAST list[i], -1);
  222. if (deriv == NULL) {
  223. printf(" %s -> derivation failed\n", list[i]);
  224. } else {
  225. xmlBufferEmpty(xmlExpBuf);
  226. xmlExpDump(xmlExpBuf, deriv);
  227. printf(" %s -> %s\n", list[i],
  228. (const char *) xmlBufferContent(xmlExpBuf));
  229. }
  230. xmlExpFree(ctxt, deriv);
  231. }
  232. }
  233. xmlBufferFree(xmlExpBuf);
  234. }
  235. #endif
  236. static void usage(const char *name) {
  237. fprintf(stderr, "Usage: %s [flags]\n", name);
  238. fprintf(stderr, "Testing tool for libxml2 string and pattern regexps\n");
  239. fprintf(stderr, " --debug: switch on debugging\n");
  240. fprintf(stderr, " --repeat: loop on the operation\n");
  241. #ifdef LIBXML_EXPR_ENABLED
  242. fprintf(stderr, " --expr: test xmlExp and not xmlRegexp\n");
  243. #endif
  244. fprintf(stderr, " --input filename: use the given filename for regexp\n");
  245. fprintf(stderr, " --input filename: use the given filename for exp\n");
  246. }
  247. int main(int argc, char **argv) {
  248. xmlRegexpPtr comp = NULL;
  249. #ifdef LIBXML_EXPR_ENABLED
  250. xmlExpNodePtr expr = NULL;
  251. int use_exp = 0;
  252. xmlExpCtxtPtr ctxt = NULL;
  253. #endif
  254. const char *pattern = NULL;
  255. char *filename = NULL;
  256. int i;
  257. xmlInitMemory();
  258. if (argc <= 1) {
  259. usage(argv[0]);
  260. return(1);
  261. }
  262. for (i = 1; i < argc ; i++) {
  263. if (!strcmp(argv[i], "-"))
  264. break;
  265. if (argv[i][0] != '-')
  266. continue;
  267. if (!strcmp(argv[i], "--"))
  268. break;
  269. if ((!strcmp(argv[i], "-debug")) || (!strcmp(argv[i], "--debug"))) {
  270. debug++;
  271. } else if ((!strcmp(argv[i], "-repeat")) ||
  272. (!strcmp(argv[i], "--repeat"))) {
  273. repeat++;
  274. #ifdef LIBXML_EXPR_ENABLED
  275. } else if ((!strcmp(argv[i], "-expr")) ||
  276. (!strcmp(argv[i], "--expr"))) {
  277. use_exp++;
  278. #endif
  279. } else if ((!strcmp(argv[i], "-i")) || (!strcmp(argv[i], "-f")) ||
  280. (!strcmp(argv[i], "--input")))
  281. filename = argv[++i];
  282. else {
  283. fprintf(stderr, "Unknown option %s\n", argv[i]);
  284. usage(argv[0]);
  285. }
  286. }
  287. #ifdef LIBXML_EXPR_ENABLED
  288. if (use_exp)
  289. ctxt = xmlExpNewCtxt(0, NULL);
  290. #endif
  291. if (filename != NULL) {
  292. #ifdef LIBXML_EXPR_ENABLED
  293. if (use_exp)
  294. runFileTest(ctxt, filename);
  295. else
  296. #endif
  297. testRegexpFile(filename);
  298. } else {
  299. int data = 0;
  300. #ifdef LIBXML_EXPR_ENABLED
  301. if (use_exp) {
  302. for (i = 1; i < argc ; i++) {
  303. if (strcmp(argv[i], "--") == 0)
  304. data = 1;
  305. else if ((argv[i][0] != '-') || (strcmp(argv[i], "-") == 0) ||
  306. (data == 1)) {
  307. if (pattern == NULL) {
  308. pattern = argv[i];
  309. printf("Testing expr %s:\n", pattern);
  310. expr = xmlExpParse(ctxt, pattern);
  311. if (expr == NULL) {
  312. printf(" failed to compile\n");
  313. break;
  314. }
  315. if (debug) {
  316. exprDebug(ctxt, expr);
  317. }
  318. } else {
  319. testReduce(ctxt, expr, argv[i]);
  320. }
  321. }
  322. }
  323. if (expr != NULL) {
  324. xmlExpFree(ctxt, expr);
  325. expr = NULL;
  326. }
  327. } else
  328. #endif
  329. {
  330. for (i = 1; i < argc ; i++) {
  331. if (strcmp(argv[i], "--") == 0)
  332. data = 1;
  333. else if ((argv[i][0] != '-') || (strcmp(argv[i], "-") == 0) ||
  334. (data == 1)) {
  335. if (pattern == NULL) {
  336. pattern = argv[i];
  337. printf("Testing %s:\n", pattern);
  338. comp = xmlRegexpCompile((const xmlChar *) pattern);
  339. if (comp == NULL) {
  340. printf(" failed to compile\n");
  341. break;
  342. }
  343. if (debug)
  344. xmlRegexpPrint(stdout, comp);
  345. } else {
  346. testRegexp(comp, argv[i]);
  347. }
  348. }
  349. }
  350. if (comp != NULL)
  351. xmlRegFreeRegexp(comp);
  352. }
  353. }
  354. #ifdef LIBXML_EXPR_ENABLED
  355. if (ctxt != NULL) {
  356. printf("Ops: %d nodes, %d cons\n",
  357. xmlExpCtxtNbNodes(ctxt), xmlExpCtxtNbCons(ctxt));
  358. xmlExpFreeCtxt(ctxt);
  359. }
  360. #endif
  361. xmlCleanupParser();
  362. xmlMemoryDump();
  363. return(0);
  364. }
  365. #else
  366. #include <stdio.h>
  367. int main(int argc ATTRIBUTE_UNUSED, char **argv ATTRIBUTE_UNUSED) {
  368. printf("%s : Regexp support not compiled in\n", argv[0]);
  369. return(0);
  370. }
  371. #endif /* LIBXML_REGEXP_ENABLED */