opc_changes.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. #include <opc/opc.h>
  2. #include <stdio.h>
  3. #include <time.h>
  4. #ifdef WIN32
  5. #include <crtdbg.h>
  6. #endif
  7. typedef void (paragraph_callback_t)(void *callback_ctx, int level, xmlChar *modeTxt, xmlChar *parTxt);
  8. typedef struct CHANGE_MODE {
  9. opc_bool_t deleted;
  10. xmlChar *mode;
  11. } changemode_t;
  12. static void initMode(changemode_t *mode) {
  13. opc_bzero_mem(mode, sizeof(*mode));
  14. }
  15. static void cleanupMode(changemode_t *mode) {
  16. if (NULL!=mode->mode) {
  17. xmlFree(mode->mode);
  18. }
  19. opc_bzero_mem(mode, sizeof(*mode));
  20. }
  21. typedef struct PARSER_CONTEXT {
  22. xmlChar *modeTxt;
  23. opc_bool_t deleted;
  24. xmlChar *parTxt;
  25. void *callback_ctx;
  26. paragraph_callback_t *callback_fct;
  27. } context_t;
  28. static void flush(context_t *ctx, int level) {
  29. if (NULL!=ctx->callback_fct) ctx->callback_fct(ctx->callback_ctx, level, ctx->modeTxt, ctx->parTxt);
  30. if (NULL!=ctx->modeTxt) xmlFree(ctx->modeTxt); ctx->modeTxt=NULL;
  31. if (NULL!=ctx->parTxt) xmlFree(ctx->parTxt); ctx->parTxt=NULL;
  32. }
  33. static void cleanup(context_t *ctx) {
  34. if (NULL!=ctx->modeTxt) xmlFree(ctx->modeTxt); ctx->modeTxt=NULL;
  35. if (NULL!=ctx->parTxt) xmlFree(ctx->parTxt); ctx->parTxt=NULL;
  36. }
  37. static void text(context_t *ctx, const xmlChar *text, changemode_t *textMode) {
  38. if (NULL!=textMode) {
  39. ctx->modeTxt=xmlStrcat(ctx->modeTxt, textMode->mode);
  40. ctx->modeTxt=xmlStrcat(ctx->modeTxt, _X(": \""));
  41. ctx->modeTxt=xmlStrcat(ctx->modeTxt, text);
  42. ctx->modeTxt=xmlStrcat(ctx->modeTxt, _X("\"\n"));
  43. }
  44. if (NULL!=textMode && textMode->deleted) {
  45. if (!ctx->deleted) {
  46. ctx->parTxt=xmlStrcat(ctx->parTxt, _X("[]"));
  47. }
  48. ctx->deleted=OPC_TRUE;
  49. } else {
  50. ctx->parTxt=xmlStrcat(ctx->parTxt, text);
  51. ctx->deleted=OPC_FALSE;
  52. }
  53. }
  54. static void par(context_t *ctx, int level, changemode_t *parMode, changemode_t *cellMode, changemode_t *rowMode) {
  55. if (NULL!=rowMode && NULL!=rowMode->mode) {
  56. xmlChar *modeTxt=NULL;;
  57. modeTxt=xmlStrcat(modeTxt, rowMode->mode);
  58. modeTxt=xmlStrcat(modeTxt, _X(": row mark\n"));
  59. ctx->modeTxt=xmlStrcat(ctx->modeTxt, modeTxt);
  60. xmlFree(modeTxt);
  61. }
  62. if (NULL!=parMode && NULL!=parMode->mode) {
  63. xmlChar *modeTxt=NULL;;
  64. modeTxt=xmlStrcat(modeTxt, parMode->mode);
  65. modeTxt=xmlStrcat(modeTxt, _X(": paragraph mark\n"));
  66. ctx->modeTxt=xmlStrcat(ctx->modeTxt, modeTxt);
  67. xmlFree(modeTxt);
  68. }
  69. if (NULL!=parMode && parMode->deleted) {
  70. if (!ctx->deleted) {
  71. ctx->parTxt=xmlStrcat(ctx->parTxt, _X("[]"));
  72. }
  73. ctx->deleted=OPC_TRUE;
  74. } else {
  75. ctx->parTxt=xmlStrcat(ctx->parTxt, _X("\n"));
  76. ctx->deleted=OPC_FALSE;
  77. flush(ctx, level);
  78. }
  79. }
  80. static char ns_w[]="http://schemas.openxmlformats.org/wordprocessingml/2006/main";
  81. static void dumpText(context_t *ctx, mceTextReader_t *reader, int level, changemode_t *textMode, changemode_t *parMode, changemode_t *cellMode, changemode_t *rowMode, changemode_t *prop_mode);
  82. static void dumpChildren(context_t *ctx, mceTextReader_t *reader, int level, changemode_t *textMode, changemode_t *parMode, changemode_t *cellMode, changemode_t *rowMode, changemode_t *prop_mode) {
  83. mce_start_children(reader) {
  84. mce_match_element(reader, NULL, NULL) {
  85. dumpText(ctx, reader, level, textMode, parMode, cellMode, rowMode, prop_mode);
  86. }
  87. mce_match_text(reader) {
  88. dumpText(ctx, reader, level, textMode, parMode, cellMode, rowMode, prop_mode);
  89. }
  90. } mce_end_children(reader);
  91. }
  92. static void dumpText(context_t *ctx, mceTextReader_t *reader, int level, changemode_t *textMode, changemode_t *parMode, changemode_t *cellMode, changemode_t *rowMode, changemode_t *prop_mode) {
  93. mce_start_choice(reader) {
  94. mce_start_element(reader, _X(ns_w), _X("t")) {
  95. mce_skip_attributes(reader);
  96. mce_start_children(reader) {
  97. mce_start_text(reader) {
  98. text(ctx, xmlTextReaderConstValue(reader->reader), textMode);
  99. } mce_end_text(reader);
  100. } mce_end_children(reader);
  101. } mce_end_element(reader);
  102. mce_start_element(reader, _X(ns_w), _X("delText")) {
  103. mce_skip_attributes(reader);
  104. mce_start_children(reader) {
  105. mce_start_text(reader) {
  106. OPC_ASSERT(NULL!=textMode && textMode->deleted);
  107. text(ctx, xmlTextReaderConstValue(reader->reader), textMode);
  108. } mce_end_text(reader);
  109. } mce_end_children(reader);
  110. } mce_end_element(reader);
  111. mce_start_element(reader, _X(ns_w), _X("ins")) {
  112. changemode_t ins_props;
  113. initMode(&ins_props);
  114. ins_props.deleted=0;
  115. ins_props.mode=xmlStrdup(_X("Insertion by "));
  116. mce_start_attributes(reader) {
  117. mce_start_attribute(reader, _X(ns_w), _X("author")) {
  118. ins_props.mode=xmlStrcat(ins_props.mode, xmlTextReaderConstValue(reader->reader));
  119. } mce_end_attribute(reader);
  120. } mce_end_attributes(reader);
  121. if (NULL!=prop_mode) {
  122. prop_mode->deleted=ins_props.deleted;
  123. prop_mode->mode=xmlStrdup(ins_props.mode);
  124. }
  125. dumpChildren(ctx, reader, level, &ins_props, parMode, cellMode, rowMode, prop_mode);
  126. cleanupMode(&ins_props);
  127. } mce_end_element(reader);
  128. mce_start_element(reader, _X(ns_w), _X("moveTo")) {
  129. changemode_t ins_props;
  130. initMode(&ins_props);
  131. ins_props.deleted=0;
  132. ins_props.mode=xmlStrdup(_X("Insertion by "));
  133. mce_start_attributes(reader) {
  134. mce_start_attribute(reader, _X(ns_w), _X("author")) {
  135. ins_props.mode=xmlStrcat(ins_props.mode, xmlTextReaderConstValue(reader->reader));
  136. } mce_end_attribute(reader);
  137. } mce_end_attributes(reader);
  138. if (NULL!=prop_mode) {
  139. prop_mode->deleted=ins_props.deleted;
  140. prop_mode->mode=xmlStrdup(ins_props.mode);
  141. }
  142. dumpChildren(ctx, reader, level, &ins_props, parMode, cellMode, rowMode, prop_mode);
  143. cleanupMode(&ins_props);
  144. } mce_end_element(reader);
  145. mce_start_element(reader, _X(ns_w), _X("del")) {
  146. changemode_t del_props;
  147. initMode(&del_props);
  148. del_props.deleted=1;
  149. del_props.mode=xmlStrdup(_X("Deletion by "));
  150. mce_start_attributes(reader) {
  151. mce_start_attribute(reader, _X(ns_w), _X("author")) {
  152. del_props.mode=xmlStrcat(del_props.mode, xmlTextReaderConstValue(reader->reader));
  153. } mce_end_attribute(reader);
  154. } mce_end_attributes(reader);
  155. if (NULL!=prop_mode) {
  156. prop_mode->deleted=del_props.deleted;
  157. prop_mode->mode=xmlStrdup(del_props.mode);
  158. }
  159. dumpChildren(ctx, reader, level, &del_props, parMode, cellMode, rowMode, prop_mode);
  160. cleanupMode(&del_props);
  161. } mce_end_element(reader);
  162. mce_start_element(reader, _X(ns_w), _X("moveFrom")) {
  163. changemode_t del_props;
  164. initMode(&del_props);
  165. del_props.deleted=1;
  166. del_props.mode=xmlStrdup(_X("Deletion by "));
  167. mce_start_attributes(reader) {
  168. mce_start_attribute(reader, _X(ns_w), _X("author")) {
  169. del_props.mode=xmlStrcat(del_props.mode, xmlTextReaderConstValue(reader->reader));
  170. } mce_end_attribute(reader);
  171. } mce_end_attributes(reader);
  172. if (NULL!=prop_mode) {
  173. prop_mode->deleted=del_props.deleted;
  174. prop_mode->mode=xmlStrdup(del_props.mode);
  175. }
  176. dumpChildren(ctx, reader, level, &del_props, parMode, cellMode, rowMode, prop_mode);
  177. cleanupMode(&del_props);
  178. } mce_end_element(reader);
  179. mce_start_element(reader, _X(ns_w), _X("p")) {
  180. changemode_t p_props;
  181. initMode(&p_props);
  182. mce_skip_attributes(reader);
  183. mce_start_children(reader) {
  184. mce_match_element(reader, _X(ns_w), _X("pPr")) {
  185. dumpText(ctx, reader, level, textMode, parMode, cellMode, rowMode, &p_props);
  186. };
  187. mce_match_element(reader, NULL, NULL) {
  188. dumpText(ctx, reader, level, textMode, &p_props, cellMode, rowMode, NULL);
  189. };
  190. } mce_end_children(reader);
  191. par(ctx, level, &p_props, cellMode, rowMode);
  192. cleanupMode(&p_props);
  193. } mce_end_element(reader);
  194. mce_start_element(reader, _X(ns_w), _X("tr")) {
  195. changemode_t tr_props;
  196. initMode(&tr_props);
  197. mce_skip_attributes(reader);
  198. mce_start_children(reader) {
  199. mce_match_element(reader, _X(ns_w), _X("trPr")) {
  200. dumpText(ctx, reader, level+1, textMode, parMode, cellMode, rowMode, &tr_props);
  201. };
  202. mce_match_element(reader, NULL, NULL) {
  203. dumpText(ctx, reader, level+1, textMode, parMode, cellMode, &tr_props, NULL);
  204. };
  205. } mce_end_children(reader);
  206. cleanupMode(&tr_props);
  207. } mce_end_element(reader);
  208. mce_start_element(reader, NULL, NULL) {
  209. mce_skip_attributes(reader);
  210. dumpChildren(ctx, reader, level, textMode, parMode, cellMode, rowMode, prop_mode);
  211. } mce_end_element(reader);
  212. mce_start_text(reader) {
  213. } mce_end_text(reader);
  214. } mce_end_choice(reader);
  215. }
  216. void parseText(xmlChar *filename, paragraph_callback_t *callback_fct, void *callback_ctx) {
  217. opcContainer *c=opcContainerOpen(filename, OPC_OPEN_READ_ONLY, NULL, NULL);
  218. if (NULL!=c) {
  219. opcRelation rel=opcRelationFind(c, OPC_PART_INVALID, NULL, _X("http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument"));
  220. if (OPC_RELATION_INVALID!=rel) {
  221. opcPart main=opcRelationGetInternalTarget(c, OPC_PART_INVALID, rel);
  222. if (OPC_PART_INVALID!=main) {
  223. const xmlChar *type=opcPartGetType(c, main);
  224. if (0==xmlStrcmp(type, _X("application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"))) {
  225. mceTextReader_t reader;
  226. if (OPC_ERROR_NONE==opcXmlReaderOpen(c, &reader, main, NULL, 0, 0)) {
  227. context_t ctx;
  228. opc_bzero_mem(&ctx, sizeof(ctx));
  229. ctx.callback_fct=callback_fct;
  230. ctx.callback_ctx=callback_ctx;
  231. mce_start_document(&reader) {
  232. mce_match_element(&reader, NULL, NULL) {
  233. dumpText(&ctx, &reader, 0, NULL, NULL, NULL, NULL, NULL);
  234. };
  235. } mce_end_document(&reader);
  236. flush(&ctx, 0);
  237. cleanup(&ctx);
  238. }
  239. mceTextReaderCleanup(&reader);
  240. }
  241. }
  242. }
  243. opcContainerClose(c, OPC_CLOSE_NOW);
  244. }
  245. }
  246. static void paragraph_callback(void *callback_ctx, int level, xmlChar *modeTxt, xmlChar *parTxt) {
  247. if (NULL!=modeTxt) {
  248. fputs((const char *)modeTxt, (FILE*)callback_ctx);
  249. }
  250. if (NULL!=parTxt) {
  251. fputs((const char *)parTxt, (FILE*)callback_ctx);
  252. }
  253. }
  254. int main( int argc, const char* argv[] )
  255. {
  256. #ifdef WIN32
  257. _CrtSetDbgFlag (_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
  258. #endif
  259. opcInitLibrary();
  260. parseText(_X(argv[1]), paragraph_callback, stdout);
  261. opcFreeLibrary();
  262. #ifdef WIN32
  263. OPC_ASSERT(!_CrtDumpMemoryLeaks());
  264. #endif
  265. return 0;
  266. }