mce_extract.c 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. /**
  2. Copyright (c) 2010, Florian Reuter
  3. All rights reserved.
  4. Redistribution and use in source and binary forms, with or without
  5. modification, are permitted provided that the following conditions
  6. are met:
  7. * Redistributions of source code must retain the above copyright
  8. notice, this list of conditions and the following disclaimer.
  9. * Redistributions in binary form must reproduce the above copyright
  10. notice, this list of conditions and the following disclaimer in
  11. the documentation and/or other materials provided with the
  12. distribution.
  13. * Neither the name of Florian Reuter nor the names of its contributors
  14. may be used to endorse or promote products derived from this
  15. software without specific prior written permission.
  16. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  17. "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  18. LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  19. FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  20. COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  21. INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  22. BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  23. LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  24. CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  25. STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  26. ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  27. OF THE POSSIBILITY OF SUCH DAMAGE.
  28. */
  29. /*
  30. Dump an XML part using the non-MCE opcXmlReader.
  31. Ussage:
  32. opc_xml FILENAME PARTNAME
  33. Sample:
  34. opc_xml OOXMLI1.docx "word/document.xml"
  35. */
  36. #include <opc/opc.h>
  37. #include <stdio.h>
  38. #include <time.h>
  39. #include <libxml/xmlwriter.h>
  40. #ifdef WIN32
  41. #include <crtdbg.h>
  42. #endif
  43. static int xmlOutputWrite(void * context, const char * buffer, int len) {
  44. FILE *out=(FILE*)context;
  45. return fwrite(buffer, sizeof(char), len, out);
  46. }
  47. static int xmlOutputClose(void * context) {
  48. return 0;
  49. }
  50. static void dumpPartsAsJSON(opcContainer *c, int indent) {
  51. printf("["); if (indent) printf("\n");
  52. opcPart part=OPC_PART_INVALID;
  53. opcPart next=OPC_PART_INVALID;
  54. for(part=opcPartGetFirst(c);OPC_PART_INVALID!=part;part=next) {
  55. next=opcPartGetNext(c, part);
  56. if (indent) {
  57. printf(" {\n \"name\": \"%s\",\n \"type\":\"%s\"\n }%s\n", part, opcPartGetType(c, part), (OPC_PART_INVALID==next?"":","));
  58. } else {
  59. printf("{\"name\": \"%s\", \"type\":\"%s\"}%s", part, opcPartGetType(c, part), (OPC_PART_INVALID==next?"":","));
  60. }
  61. }
  62. printf("]"); if (indent) printf("\n");
  63. }
  64. int main( int argc, const char* argv[] )
  65. {
  66. #ifdef WIN32
  67. _CrtSetDbgFlag (_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
  68. #endif
  69. int ret=-1;
  70. time_t start_time=time(NULL);
  71. FILE *file=NULL;
  72. const xmlChar *containerPath8=NULL;
  73. const xmlChar *partName8=NULL;
  74. xmlTextWriter *writer=NULL;
  75. int writer_indent=0;
  76. opc_bool_t reader_mce=OPC_TRUE;
  77. for(int i=1;i<argc;i++) {
  78. if ((0==xmlStrcmp(_X("--understands"), _X(argv[i])) || 0==xmlStrcmp(_X("-u"), _X(argv[i]))) && i+1<argc) {
  79. i++; // skip namespace, registered later when parser was created.
  80. } else if ((0==xmlStrcmp(_X("--out"), _X(argv[i])) || 0==xmlStrcmp(_X("--out"), _X(argv[i]))) && i+1<argc && NULL==file) {
  81. const char *filename=argv[++i];
  82. file=fopen(filename, "w");
  83. } else if (0==xmlStrcmp(_X("--indent"), _X(argv[i]))) {
  84. writer_indent=1;
  85. } else if (0==xmlStrcmp(_X("--raw"), _X(argv[i]))) {
  86. reader_mce=OPC_FALSE;
  87. } else if (NULL==containerPath8) {
  88. containerPath8=_X(argv[i]);
  89. } else if (NULL==partName8) {
  90. partName8=_X(argv[i]);
  91. } else {
  92. fprintf(stderr, "IGNORED: %s\n", argv[i]);
  93. }
  94. }
  95. if (NULL!=file) {
  96. xmlOutputBuffer *out=xmlOutputBufferCreateIO(xmlOutputWrite, xmlOutputClose, file, NULL);
  97. if (NULL!=out) {
  98. writer=xmlNewTextWriter(out);
  99. }
  100. } else {
  101. xmlOutputBuffer *out=xmlOutputBufferCreateIO(xmlOutputWrite, xmlOutputClose, stdout, NULL);
  102. if (NULL!=out) {
  103. writer=xmlNewTextWriter(out);
  104. }
  105. }
  106. if (NULL==containerPath8 || NULL==writer) {
  107. printf("mce_extract FILENAME.\n\n");
  108. printf("Sample: mce_extract test.docx word/document.xml\n");
  109. } else if (OPC_ERROR_NONE==opcInitLibrary()) {
  110. xmlTextWriterSetIndent(writer, writer_indent);
  111. opcContainer *c=NULL;
  112. if (NULL!=(c=opcContainerOpen(containerPath8, OPC_OPEN_READ_ONLY, NULL, NULL))) {
  113. if (NULL==partName8) {
  114. dumpPartsAsJSON(c, writer_indent);
  115. } else {
  116. opcPart part=OPC_PART_INVALID;
  117. if ((part=opcPartFind(c, partName8, NULL, 0))!=OPC_PART_INVALID) {
  118. mceTextReader_t reader;
  119. if (OPC_ERROR_NONE==opcXmlReaderOpen(c, &reader, part, NULL, NULL, 0)) {
  120. mceTextReaderDisableMCE(&reader, !reader_mce);
  121. for(int i=1;i<argc;i++) {
  122. if ((0==xmlStrcmp(_X("--understands"), _X(argv[i])) || 0==xmlStrcmp(_X("-u"), _X(argv[i]))) && i+1<argc) {
  123. const xmlChar *ns=_X(argv[++i]);
  124. mceTextReaderUnderstandsNamespace(&reader, ns);
  125. }
  126. }
  127. if (-1==mceTextReaderDump(&reader, writer, PTRUE)) {
  128. ret=mceTextReaderGetError(&reader);
  129. } else {
  130. ret=0;
  131. }
  132. mceTextReaderCleanup(&reader);
  133. } else {
  134. fprintf(stderr, "ERROR: part \"%s\" could not be opened for XML reading.\n", argv[2]);
  135. }
  136. } else {
  137. fprintf(stderr, "ERROR: part \"%s\" could not be opened in \"%s\".\n", argv[2], argv[1]);
  138. }
  139. }
  140. opcContainerClose(c, OPC_CLOSE_NOW);
  141. } else {
  142. fprintf(stderr, "ERROR: file \"%s\" could not be opened.\n", argv[1]);
  143. }
  144. opcFreeLibrary();
  145. } else {
  146. fprintf(stderr, "ERROR: initialization of libopc failed.\n");
  147. }
  148. if (NULL!=writer) xmlFreeTextWriter(writer);
  149. if (NULL!=file) fclose(file);
  150. time_t end_time=time(NULL);
  151. fprintf(stderr, "time %.2lfsec\n", difftime(end_time, start_time));
  152. return ret;
  153. }