uri.c 66 KB


  1. /**
  2. * uri.c: set of generic URI related routines
  3. *
  4. * Reference: RFCs 3986, 2732 and 2373
  5. *
  6. * See Copyright for the status of this software.
  7. *
  8. * daniel@veillard.com
  9. */
  10. #define IN_LIBXML
  11. #include "libxml.h"
  12. #include <string.h>
  13. #include <libxml/xmlmemory.h>
  14. #include <libxml/uri.h>
  15. #include <libxml/globals.h>
  16. #include <libxml/xmlerror.h>
  17. static void xmlCleanURI(xmlURIPtr uri);
  18. /*
  19. * Old rule from 2396 used in legacy handling code
  20. * alpha = lowalpha | upalpha
  21. */
  22. #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
  23. /*
  24. * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
  25. * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
  26. * "u" | "v" | "w" | "x" | "y" | "z"
  27. */
  28. #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
  29. /*
  30. * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
  31. * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
  32. * "U" | "V" | "W" | "X" | "Y" | "Z"
  33. */
  34. #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
  35. #ifdef IS_DIGIT
  36. #undef IS_DIGIT
  37. #endif
  38. /*
  39. * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
  40. */
  41. #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
  42. /*
  43. * alphanum = alpha | digit
  44. */
  45. #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
  46. /*
  47. * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
  48. */
  49. #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
  50. ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
  51. ((x) == '(') || ((x) == ')'))
  52. /*
  53. * unwise = "{" | "}" | "|" | "\" | "^" | "`"
  54. */
  55. #define IS_UNWISE(p) \
  56. (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
  57. ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
  58. ((*(p) == ']')) || ((*(p) == '`')))
  59. /*
  60. * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
  61. * "[" | "]"
  62. */
  63. #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
  64. ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
  65. ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
  66. ((x) == ']'))
  67. /*
  68. * unreserved = alphanum | mark
  69. */
  70. #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
  71. /*
  72. * Skip to next pointer char, handle escaped sequences
  73. */
  74. #define NEXT(p) ((*p == '%')? p += 3 : p++)
  75. /*
  76. * Productions from the spec.
  77. *
  78. * authority = server | reg_name
  79. * reg_name = 1*( unreserved | escaped | "$" | "," |
  80. * ";" | ":" | "@" | "&" | "=" | "+" )
  81. *
  82. * path = [ abs_path | opaque_part ]
  83. */
  84. #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
  85. /************************************************************************
  86. * *
  87. * RFC 3986 parser *
  88. * *
  89. ************************************************************************/
  90. #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
  91. #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
  92. ((*(p) >= 'A') && (*(p) <= 'Z')))
  93. #define ISA_HEXDIG(p) \
  94. (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
  95. ((*(p) >= 'A') && (*(p) <= 'F')))
  96. /*
  97. * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
  98. * / "*" / "+" / "," / ";" / "="
  99. */
  100. #define ISA_SUB_DELIM(p) \
  101. (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
  102. ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
  103. ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
  104. ((*(p) == '=')))
  105. /*
  106. * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
  107. */
  108. #define ISA_GEN_DELIM(p) \
  109. (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
  110. ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
  111. ((*(p) == '@')))
  112. /*
  113. * reserved = gen-delims / sub-delims
  114. */
  115. #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
  116. /*
  117. * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
  118. */
  119. #define ISA_UNRESERVED(p) \
  120. ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
  121. ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
  122. /*
  123. * pct-encoded = "%" HEXDIG HEXDIG
  124. */
  125. #define ISA_PCT_ENCODED(p) \
  126. ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
  127. /*
  128. * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
  129. */
  130. #define ISA_PCHAR(p) \
  131. (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
  132. ((*(p) == ':')) || ((*(p) == '@')))
  133. /**
  134. * xmlParse3986Scheme:
  135. * @uri: pointer to an URI structure
  136. * @str: pointer to the string to analyze
  137. *
  138. * Parse an URI scheme
  139. *
  140. * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
  141. *
  142. * Returns 0 or the error code
  143. */
  144. static int
  145. xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
  146. const char *cur;
  147. if (str == NULL)
  148. return(-1);
  149. cur = *str;
  150. if (!ISA_ALPHA(cur))
  151. return(2);
  152. cur++;
  153. while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
  154. (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
  155. if (uri != NULL) {
  156. if (uri->scheme != NULL) xmlFree(uri->scheme);
  157. uri->scheme = STRNDUP(*str, cur - *str);
  158. }
  159. *str = cur;
  160. return(0);
  161. }
  162. /**
  163. * xmlParse3986Fragment:
  164. * @uri: pointer to an URI structure
  165. * @str: pointer to the string to analyze
  166. *
  167. * Parse the query part of an URI
  168. *
  169. * fragment = *( pchar / "/" / "?" )
  170. * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
  171. * in the fragment identifier but this is used very broadly for
  172. * xpointer scheme selection, so we are allowing it here to not break
  173. * for example all the DocBook processing chains.
  174. *
  175. * Returns 0 or the error code
  176. */
  177. static int
  178. xmlParse3986Fragment(xmlURIPtr uri, const char **str)
  179. {
  180. const char *cur;
  181. if (str == NULL)
  182. return (-1);
  183. cur = *str;
  184. while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
  185. (*cur == '[') || (*cur == ']') ||
  186. ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
  187. NEXT(cur);
  188. if (uri != NULL) {
  189. if (uri->fragment != NULL)
  190. xmlFree(uri->fragment);
  191. if (uri->cleanup & 2)
  192. uri->fragment = STRNDUP(*str, cur - *str);
  193. else
  194. uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
  195. }
  196. *str = cur;
  197. return (0);
  198. }
  199. /**
  200. * xmlParse3986Query:
  201. * @uri: pointer to an URI structure
  202. * @str: pointer to the string to analyze
  203. *
  204. * Parse the query part of an URI
  205. *
  206. * query = *uric
  207. *
  208. * Returns 0 or the error code
  209. */
  210. static int
  211. xmlParse3986Query(xmlURIPtr uri, const char **str)
  212. {
  213. const char *cur;
  214. if (str == NULL)
  215. return (-1);
  216. cur = *str;
  217. while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
  218. ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
  219. NEXT(cur);
  220. if (uri != NULL) {
  221. if (uri->query != NULL)
  222. xmlFree(uri->query);
  223. if (uri->cleanup & 2)
  224. uri->query = STRNDUP(*str, cur - *str);
  225. else
  226. uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
  227. /* Save the raw bytes of the query as well.
  228. * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
  229. */
  230. if (uri->query_raw != NULL)
  231. xmlFree (uri->query_raw);
  232. uri->query_raw = STRNDUP (*str, cur - *str);
  233. }
  234. *str = cur;
  235. return (0);
  236. }
  237. /**
  238. * xmlParse3986Port:
  239. * @uri: pointer to an URI structure
  240. * @str: the string to analyze
  241. *
  242. * Parse a port part and fills in the appropriate fields
  243. * of the @uri structure
  244. *
  245. * port = *DIGIT
  246. *
  247. * Returns 0 or the error code
  248. */
  249. static int
  250. xmlParse3986Port(xmlURIPtr uri, const char **str)
  251. {
  252. const char *cur = *str;
  253. if (ISA_DIGIT(cur)) {
  254. if (uri != NULL)
  255. uri->port = 0;
  256. while (ISA_DIGIT(cur)) {
  257. if (uri != NULL)
  258. uri->port = uri->port * 10 + (*cur - '0');
  259. cur++;
  260. }
  261. *str = cur;
  262. return(0);
  263. }
  264. return(1);
  265. }
  266. /**
  267. * xmlParse3986Userinfo:
  268. * @uri: pointer to an URI structure
  269. * @str: the string to analyze
  270. *
  271. * Parse an user informations part and fills in the appropriate fields
  272. * of the @uri structure
  273. *
  274. * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
  275. *
  276. * Returns 0 or the error code
  277. */
  278. static int
  279. xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
  280. {
  281. const char *cur;
  282. cur = *str;
  283. while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
  284. ISA_SUB_DELIM(cur) || (*cur == ':'))
  285. NEXT(cur);
  286. if (*cur == '@') {
  287. if (uri != NULL) {
  288. if (uri->user != NULL) xmlFree(uri->user);
  289. if (uri->cleanup & 2)
  290. uri->user = STRNDUP(*str, cur - *str);
  291. else
  292. uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
  293. }
  294. *str = cur;
  295. return(0);
  296. }
  297. return(1);
  298. }
  299. /**
  300. * xmlParse3986DecOctet:
  301. * @str: the string to analyze
  302. *
  303. * dec-octet = DIGIT ; 0-9
  304. * / %x31-39 DIGIT ; 10-99
  305. * / "1" 2DIGIT ; 100-199
  306. * / "2" %x30-34 DIGIT ; 200-249
  307. * / "25" %x30-35 ; 250-255
  308. *
  309. * Skip a dec-octet.
  310. *
  311. * Returns 0 if found and skipped, 1 otherwise
  312. */
  313. static int
  314. xmlParse3986DecOctet(const char **str) {
  315. const char *cur = *str;
  316. if (!(ISA_DIGIT(cur)))
  317. return(1);
  318. if (!ISA_DIGIT(cur+1))
  319. cur++;
  320. else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
  321. cur += 2;
  322. else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
  323. cur += 3;
  324. else if ((*cur == '2') && (*(cur + 1) >= '0') &&
  325. (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
  326. cur += 3;
  327. else if ((*cur == '2') && (*(cur + 1) == '5') &&
  328. (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
  329. cur += 3;
  330. else
  331. return(1);
  332. *str = cur;
  333. return(0);
  334. }
  335. /**
  336. * xmlParse3986Host:
  337. * @uri: pointer to an URI structure
  338. * @str: the string to analyze
  339. *
  340. * Parse an host part and fills in the appropriate fields
  341. * of the @uri structure
  342. *
  343. * host = IP-literal / IPv4address / reg-name
  344. * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
  345. * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
  346. * reg-name = *( unreserved / pct-encoded / sub-delims )
  347. *
  348. * Returns 0 or the error code
  349. */
  350. static int
  351. xmlParse3986Host(xmlURIPtr uri, const char **str)
  352. {
  353. const char *cur = *str;
  354. const char *host;
  355. host = cur;
  356. /*
  357. * IPv6 and future adressing scheme are enclosed between brackets
  358. */
  359. if (*cur == '[') {
  360. cur++;
  361. while ((*cur != ']') && (*cur != 0))
  362. cur++;
  363. if (*cur != ']')
  364. return(1);
  365. cur++;
  366. goto found;
  367. }
  368. /*
  369. * try to parse an IPv4
  370. */
  371. if (ISA_DIGIT(cur)) {
  372. if (xmlParse3986DecOctet(&cur) != 0)
  373. goto not_ipv4;
  374. if (*cur != '.')
  375. goto not_ipv4;
  376. cur++;
  377. if (xmlParse3986DecOctet(&cur) != 0)
  378. goto not_ipv4;
  379. if (*cur != '.')
  380. goto not_ipv4;
  381. if (xmlParse3986DecOctet(&cur) != 0)
  382. goto not_ipv4;
  383. if (*cur != '.')
  384. goto not_ipv4;
  385. if (xmlParse3986DecOctet(&cur) != 0)
  386. goto not_ipv4;
  387. goto found;
  388. not_ipv4:
  389. cur = *str;
  390. }
  391. /*
  392. * then this should be a hostname which can be empty
  393. */
  394. while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
  395. NEXT(cur);
  396. found:
  397. if (uri != NULL) {
  398. if (uri->authority != NULL) xmlFree(uri->authority);
  399. uri->authority = NULL;
  400. if (uri->server != NULL) xmlFree(uri->server);
  401. if (cur != host) {
  402. if (uri->cleanup & 2)
  403. uri->server = STRNDUP(host, cur - host);
  404. else
  405. uri->server = xmlURIUnescapeString(host, cur - host, NULL);
  406. } else
  407. uri->server = NULL;
  408. }
  409. *str = cur;
  410. return(0);
  411. }
  412. /**
  413. * xmlParse3986Authority:
  414. * @uri: pointer to an URI structure
  415. * @str: the string to analyze
  416. *
  417. * Parse an authority part and fills in the appropriate fields
  418. * of the @uri structure
  419. *
  420. * authority = [ userinfo "@" ] host [ ":" port ]
  421. *
  422. * Returns 0 or the error code
  423. */
  424. static int
  425. xmlParse3986Authority(xmlURIPtr uri, const char **str)
  426. {
  427. const char *cur;
  428. int ret;
  429. cur = *str;
  430. /*
  431. * try to parse an userinfo and check for the trailing @
  432. */
  433. ret = xmlParse3986Userinfo(uri, &cur);
  434. if ((ret != 0) || (*cur != '@'))
  435. cur = *str;
  436. else
  437. cur++;
  438. ret = xmlParse3986Host(uri, &cur);
  439. if (ret != 0) return(ret);
  440. if (*cur == ':') {
  441. cur++;
  442. ret = xmlParse3986Port(uri, &cur);
  443. if (ret != 0) return(ret);
  444. }
  445. *str = cur;
  446. return(0);
  447. }
  448. /**
  449. * xmlParse3986Segment:
  450. * @str: the string to analyze
  451. * @forbid: an optional forbidden character
  452. * @empty: allow an empty segment
  453. *
  454. * Parse a segment and fills in the appropriate fields
  455. * of the @uri structure
  456. *
  457. * segment = *pchar
  458. * segment-nz = 1*pchar
  459. * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
  460. * ; non-zero-length segment without any colon ":"
  461. *
  462. * Returns 0 or the error code
  463. */
  464. static int
  465. xmlParse3986Segment(const char **str, char forbid, int empty)
  466. {
  467. const char *cur;
  468. cur = *str;
  469. if (!ISA_PCHAR(cur)) {
  470. if (empty)
  471. return(0);
  472. return(1);
  473. }
  474. while (ISA_PCHAR(cur) && (*cur != forbid))
  475. NEXT(cur);
  476. *str = cur;
  477. return (0);
  478. }
  479. /**
  480. * xmlParse3986PathAbEmpty:
  481. * @uri: pointer to an URI structure
  482. * @str: the string to analyze
  483. *
  484. * Parse an path absolute or empty and fills in the appropriate fields
  485. * of the @uri structure
  486. *
  487. * path-abempty = *( "/" segment )
  488. *
  489. * Returns 0 or the error code
  490. */
  491. static int
  492. xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
  493. {
  494. const char *cur;
  495. int ret;
  496. cur = *str;
  497. while (*cur == '/') {
  498. cur++;
  499. ret = xmlParse3986Segment(&cur, 0, 1);
  500. if (ret != 0) return(ret);
  501. }
  502. if (uri != NULL) {
  503. if (uri->path != NULL) xmlFree(uri->path);
  504. if (*str != cur) {
  505. if (uri->cleanup & 2)
  506. uri->path = STRNDUP(*str, cur - *str);
  507. else
  508. uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
  509. } else {
  510. uri->path = NULL;
  511. }
  512. }
  513. *str = cur;
  514. return (0);
  515. }
  516. /**
  517. * xmlParse3986PathAbsolute:
  518. * @uri: pointer to an URI structure
  519. * @str: the string to analyze
  520. *
  521. * Parse an path absolute and fills in the appropriate fields
  522. * of the @uri structure
  523. *
  524. * path-absolute = "/" [ segment-nz *( "/" segment ) ]
  525. *
  526. * Returns 0 or the error code
  527. */
  528. static int
  529. xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
  530. {
  531. const char *cur;
  532. int ret;
  533. cur = *str;
  534. if (*cur != '/')
  535. return(1);
  536. cur++;
  537. ret = xmlParse3986Segment(&cur, 0, 0);
  538. if (ret == 0) {
  539. while (*cur == '/') {
  540. cur++;
  541. ret = xmlParse3986Segment(&cur, 0, 1);
  542. if (ret != 0) return(ret);
  543. }
  544. }
  545. if (uri != NULL) {
  546. if (uri->path != NULL) xmlFree(uri->path);
  547. if (cur != *str) {
  548. if (uri->cleanup & 2)
  549. uri->path = STRNDUP(*str, cur - *str);
  550. else
  551. uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
  552. } else {
  553. uri->path = NULL;
  554. }
  555. }
  556. *str = cur;
  557. return (0);
  558. }
  559. /**
  560. * xmlParse3986PathRootless:
  561. * @uri: pointer to an URI structure
  562. * @str: the string to analyze
  563. *
  564. * Parse an path without root and fills in the appropriate fields
  565. * of the @uri structure
  566. *
  567. * path-rootless = segment-nz *( "/" segment )
  568. *
  569. * Returns 0 or the error code
  570. */
  571. static int
  572. xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
  573. {
  574. const char *cur;
  575. int ret;
  576. cur = *str;
  577. ret = xmlParse3986Segment(&cur, 0, 0);
  578. if (ret != 0) return(ret);
  579. while (*cur == '/') {
  580. cur++;
  581. ret = xmlParse3986Segment(&cur, 0, 1);
  582. if (ret != 0) return(ret);
  583. }
  584. if (uri != NULL) {
  585. if (uri->path != NULL) xmlFree(uri->path);
  586. if (cur != *str) {
  587. if (uri->cleanup & 2)
  588. uri->path = STRNDUP(*str, cur - *str);
  589. else
  590. uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
  591. } else {
  592. uri->path = NULL;
  593. }
  594. }
  595. *str = cur;
  596. return (0);
  597. }
  598. /**
  599. * xmlParse3986PathNoScheme:
  600. * @uri: pointer to an URI structure
  601. * @str: the string to analyze
  602. *
  603. * Parse an path which is not a scheme and fills in the appropriate fields
  604. * of the @uri structure
  605. *
  606. * path-noscheme = segment-nz-nc *( "/" segment )
  607. *
  608. * Returns 0 or the error code
  609. */
  610. static int
  611. xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
  612. {
  613. const char *cur;
  614. int ret;
  615. cur = *str;
  616. ret = xmlParse3986Segment(&cur, ':', 0);
  617. if (ret != 0) return(ret);
  618. while (*cur == '/') {
  619. cur++;
  620. ret = xmlParse3986Segment(&cur, 0, 1);
  621. if (ret != 0) return(ret);
  622. }
  623. if (uri != NULL) {
  624. if (uri->path != NULL) xmlFree(uri->path);
  625. if (cur != *str) {
  626. if (uri->cleanup & 2)
  627. uri->path = STRNDUP(*str, cur - *str);
  628. else
  629. uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
  630. } else {
  631. uri->path = NULL;
  632. }
  633. }
  634. *str = cur;
  635. return (0);
  636. }
  637. /**
  638. * xmlParse3986HierPart:
  639. * @uri: pointer to an URI structure
  640. * @str: the string to analyze
  641. *
  642. * Parse an hierarchical part and fills in the appropriate fields
  643. * of the @uri structure
  644. *
  645. * hier-part = "//" authority path-abempty
  646. * / path-absolute
  647. * / path-rootless
  648. * / path-empty
  649. *
  650. * Returns 0 or the error code
  651. */
  652. static int
  653. xmlParse3986HierPart(xmlURIPtr uri, const char **str)
  654. {
  655. const char *cur;
  656. int ret;
  657. cur = *str;
  658. if ((*cur == '/') && (*(cur + 1) == '/')) {
  659. cur += 2;
  660. ret = xmlParse3986Authority(uri, &cur);
  661. if (ret != 0) return(ret);
  662. ret = xmlParse3986PathAbEmpty(uri, &cur);
  663. if (ret != 0) return(ret);
  664. *str = cur;
  665. return(0);
  666. } else if (*cur == '/') {
  667. ret = xmlParse3986PathAbsolute(uri, &cur);
  668. if (ret != 0) return(ret);
  669. } else if (ISA_PCHAR(cur)) {
  670. ret = xmlParse3986PathRootless(uri, &cur);
  671. if (ret != 0) return(ret);
  672. } else {
  673. /* path-empty is effectively empty */
  674. if (uri != NULL) {
  675. if (uri->path != NULL) xmlFree(uri->path);
  676. uri->path = NULL;
  677. }
  678. }
  679. *str = cur;
  680. return (0);
  681. }
  682. /**
  683. * xmlParse3986RelativeRef:
  684. * @uri: pointer to an URI structure
  685. * @str: the string to analyze
  686. *
  687. * Parse an URI string and fills in the appropriate fields
  688. * of the @uri structure
  689. *
  690. * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
  691. * relative-part = "//" authority path-abempty
  692. * / path-absolute
  693. * / path-noscheme
  694. * / path-empty
  695. *
  696. * Returns 0 or the error code
  697. */
  698. static int
  699. xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
  700. int ret;
  701. if ((*str == '/') && (*(str + 1) == '/')) {
  702. str += 2;
  703. ret = xmlParse3986Authority(uri, &str);
  704. if (ret != 0) return(ret);
  705. ret = xmlParse3986PathAbEmpty(uri, &str);
  706. if (ret != 0) return(ret);
  707. } else if (*str == '/') {
  708. ret = xmlParse3986PathAbsolute(uri, &str);
  709. if (ret != 0) return(ret);
  710. } else if (ISA_PCHAR(str)) {
  711. ret = xmlParse3986PathNoScheme(uri, &str);
  712. if (ret != 0) return(ret);
  713. } else {
  714. /* path-empty is effectively empty */
  715. if (uri != NULL) {
  716. if (uri->path != NULL) xmlFree(uri->path);
  717. uri->path = NULL;
  718. }
  719. }
  720. if (*str == '?') {
  721. str++;
  722. ret = xmlParse3986Query(uri, &str);
  723. if (ret != 0) return(ret);
  724. }
  725. if (*str == '#') {
  726. str++;
  727. ret = xmlParse3986Fragment(uri, &str);
  728. if (ret != 0) return(ret);
  729. }
  730. if (*str != 0) {
  731. xmlCleanURI(uri);
  732. return(1);
  733. }
  734. return(0);
  735. }
  736. /**
  737. * xmlParse3986URI:
  738. * @uri: pointer to an URI structure
  739. * @str: the string to analyze
  740. *
  741. * Parse an URI string and fills in the appropriate fields
  742. * of the @uri structure
  743. *
  744. * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
  745. *
  746. * Returns 0 or the error code
  747. */
  748. static int
  749. xmlParse3986URI(xmlURIPtr uri, const char *str) {
  750. int ret;
  751. ret = xmlParse3986Scheme(uri, &str);
  752. if (ret != 0) return(ret);
  753. if (*str != ':') {
  754. return(1);
  755. }
  756. str++;
  757. ret = xmlParse3986HierPart(uri, &str);
  758. if (ret != 0) return(ret);
  759. if (*str == '?') {
  760. str++;
  761. ret = xmlParse3986Query(uri, &str);
  762. if (ret != 0) return(ret);
  763. }
  764. if (*str == '#') {
  765. str++;
  766. ret = xmlParse3986Fragment(uri, &str);
  767. if (ret != 0) return(ret);
  768. }
  769. if (*str != 0) {
  770. xmlCleanURI(uri);
  771. return(1);
  772. }
  773. return(0);
  774. }
  775. /**
  776. * xmlParse3986URIReference:
  777. * @uri: pointer to an URI structure
  778. * @str: the string to analyze
  779. *
  780. * Parse an URI reference string and fills in the appropriate fields
  781. * of the @uri structure
  782. *
  783. * URI-reference = URI / relative-ref
  784. *
  785. * Returns 0 or the error code
  786. */
  787. static int
  788. xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
  789. int ret;
  790. if (str == NULL)
  791. return(-1);
  792. xmlCleanURI(uri);
  793. /*
  794. * Try first to parse absolute refs, then fallback to relative if
  795. * it fails.
  796. */
  797. ret = xmlParse3986URI(uri, str);
  798. if (ret != 0) {
  799. xmlCleanURI(uri);
  800. ret = xmlParse3986RelativeRef(uri, str);
  801. if (ret != 0) {
  802. xmlCleanURI(uri);
  803. return(ret);
  804. }
  805. }
  806. return(0);
  807. }
  808. /**
  809. * xmlParseURI:
  810. * @str: the URI string to analyze
  811. *
  812. * Parse an URI based on RFC 3986
  813. *
  814. * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
  815. *
  816. * Returns a newly built xmlURIPtr or NULL in case of error
  817. */
  818. xmlURIPtr
  819. xmlParseURI(const char *str) {
  820. xmlURIPtr uri;
  821. int ret;
  822. if (str == NULL)
  823. return(NULL);
  824. uri = xmlCreateURI();
  825. if (uri != NULL) {
  826. ret = xmlParse3986URIReference(uri, str);
  827. if (ret) {
  828. xmlFreeURI(uri);
  829. return(NULL);
  830. }
  831. }
  832. return(uri);
  833. }
  834. /**
  835. * xmlParseURIReference:
  836. * @uri: pointer to an URI structure
  837. * @str: the string to analyze
  838. *
  839. * Parse an URI reference string based on RFC 3986 and fills in the
  840. * appropriate fields of the @uri structure
  841. *
  842. * URI-reference = URI / relative-ref
  843. *
  844. * Returns 0 or the error code
  845. */
  846. int
  847. xmlParseURIReference(xmlURIPtr uri, const char *str) {
  848. return(xmlParse3986URIReference(uri, str));
  849. }
  850. /**
  851. * xmlParseURIRaw:
  852. * @str: the URI string to analyze
  853. * @raw: if 1 unescaping of URI pieces are disabled
  854. *
  855. * Parse an URI but allows to keep intact the original fragments.
  856. *
  857. * URI-reference = URI / relative-ref
  858. *
  859. * Returns a newly built xmlURIPtr or NULL in case of error
  860. */
  861. xmlURIPtr
  862. xmlParseURIRaw(const char *str, int raw) {
  863. xmlURIPtr uri;
  864. int ret;
  865. if (str == NULL)
  866. return(NULL);
  867. uri = xmlCreateURI();
  868. if (uri != NULL) {
  869. if (raw) {
  870. uri->cleanup |= 2;
  871. }
  872. ret = xmlParseURIReference(uri, str);
  873. if (ret) {
  874. xmlFreeURI(uri);
  875. return(NULL);
  876. }
  877. }
  878. return(uri);
  879. }
  880. /************************************************************************
  881. * *
  882. * Generic URI structure functions *
  883. * *
  884. ************************************************************************/
  885. /**
  886. * xmlCreateURI:
  887. *
  888. * Simply creates an empty xmlURI
  889. *
  890. * Returns the new structure or NULL in case of error
  891. */
  892. xmlURIPtr
  893. xmlCreateURI(void) {
  894. xmlURIPtr ret;
  895. ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
  896. if (ret == NULL) {
  897. xmlGenericError(xmlGenericErrorContext,
  898. "xmlCreateURI: out of memory\n");
  899. return(NULL);
  900. }
  901. memset(ret, 0, sizeof(xmlURI));
  902. return(ret);
  903. }
  904. /**
  905. * xmlSaveUri:
  906. * @uri: pointer to an xmlURI
  907. *
  908. * Save the URI as an escaped string
  909. *
  910. * Returns a new string (to be deallocated by caller)
  911. */
  912. xmlChar *
  913. xmlSaveUri(xmlURIPtr uri) {
  914. xmlChar *ret = NULL;
  915. xmlChar *temp;
  916. const char *p;
  917. int len;
  918. int max;
  919. if (uri == NULL) return(NULL);
  920. max = 80;
  921. ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
  922. if (ret == NULL) {
  923. xmlGenericError(xmlGenericErrorContext,
  924. "xmlSaveUri: out of memory\n");
  925. return(NULL);
  926. }
  927. len = 0;
  928. if (uri->scheme != NULL) {
  929. p = uri->scheme;
  930. while (*p != 0) {
  931. if (len >= max) {
  932. max *= 2;
  933. temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
  934. if (temp == NULL) {
  935. xmlGenericError(xmlGenericErrorContext,
  936. "xmlSaveUri: out of memory\n");
  937. xmlFree(ret);
  938. return(NULL);
  939. }
  940. ret = temp;
  941. }
  942. ret[len++] = *p++;
  943. }
  944. if (len >= max) {
  945. max *= 2;
  946. temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
  947. if (temp == NULL) {
  948. xmlGenericError(xmlGenericErrorContext,
  949. "xmlSaveUri: out of memory\n");
  950. xmlFree(ret);
  951. return(NULL);
  952. }
  953. ret = temp;
  954. }
  955. ret[len++] = ':';
  956. }
  957. if (uri->opaque != NULL) {
  958. p = uri->opaque;
  959. while (*p != 0) {
  960. if (len + 3 >= max) {
  961. max *= 2;
  962. temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
  963. if (temp == NULL) {
  964. xmlGenericError(xmlGenericErrorContext,
  965. "xmlSaveUri: out of memory\n");
  966. xmlFree(ret);
  967. return(NULL);
  968. }
  969. ret = temp;
  970. }
  971. if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
  972. ret[len++] = *p++;
  973. else {
  974. int val = *(unsigned char *)p++;
  975. int hi = val / 0x10, lo = val % 0x10;
  976. ret[len++] = '%';
  977. ret[len++] = hi + (hi > 9? 'A'-10 : '0');
  978. ret[len++] = lo + (lo > 9? 'A'-10 : '0');
  979. }
  980. }
  981. } else {
  982. if (uri->server != NULL) {
  983. if (len + 3 >= max) {
  984. max *= 2;
  985. temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
  986. if (temp == NULL) {
  987. xmlGenericError(xmlGenericErrorContext,
  988. "xmlSaveUri: out of memory\n");
  989. xmlFree(ret);
  990. return(NULL);
  991. }
  992. ret = temp;
  993. }
  994. ret[len++] = '/';
  995. ret[len++] = '/';
  996. if (uri->user != NULL) {
  997. p = uri->user;
  998. while (*p != 0) {
  999. if (len + 3 >= max) {
  1000. max *= 2;
  1001. temp = (xmlChar *) xmlRealloc(ret,
  1002. (max + 1) * sizeof(xmlChar));
  1003. if (temp == NULL) {
  1004. xmlGenericError(xmlGenericErrorContext,
  1005. "xmlSaveUri: out of memory\n");
  1006. xmlFree(ret);
  1007. return(NULL);
  1008. }
  1009. ret = temp;
  1010. }
  1011. if ((IS_UNRESERVED(*(p))) ||
  1012. ((*(p) == ';')) || ((*(p) == ':')) ||
  1013. ((*(p) == '&')) || ((*(p) == '=')) ||
  1014. ((*(p) == '+')) || ((*(p) == '$')) ||
  1015. ((*(p) == ',')))
  1016. ret[len++] = *p++;
  1017. else {
  1018. int val = *(unsigned char *)p++;
  1019. int hi = val / 0x10, lo = val % 0x10;
  1020. ret[len++] = '%';
  1021. ret[len++] = hi + (hi > 9? 'A'-10 : '0');
  1022. ret[len++] = lo + (lo > 9? 'A'-10 : '0');
  1023. }
  1024. }
  1025. if (len + 3 >= max) {
  1026. max *= 2;
  1027. temp = (xmlChar *) xmlRealloc(ret,
  1028. (max + 1) * sizeof(xmlChar));
  1029. if (temp == NULL) {
  1030. xmlGenericError(xmlGenericErrorContext,
  1031. "xmlSaveUri: out of memory\n");
  1032. xmlFree(ret);
  1033. return(NULL);
  1034. }
  1035. ret = temp;
  1036. }
  1037. ret[len++] = '@';
  1038. }
  1039. p = uri->server;
  1040. while (*p != 0) {
  1041. if (len >= max) {
  1042. max *= 2;
  1043. temp = (xmlChar *) xmlRealloc(ret,
  1044. (max + 1) * sizeof(xmlChar));
  1045. if (temp == NULL) {
  1046. xmlGenericError(xmlGenericErrorContext,
  1047. "xmlSaveUri: out of memory\n");
  1048. xmlFree(ret);
  1049. return(NULL);
  1050. }
  1051. ret = temp;
  1052. }
  1053. ret[len++] = *p++;
  1054. }
  1055. if (uri->port > 0) {
  1056. if (len + 10 >= max) {
  1057. max *= 2;
  1058. temp = (xmlChar *) xmlRealloc(ret,
  1059. (max + 1) * sizeof(xmlChar));
  1060. if (temp == NULL) {
  1061. xmlGenericError(xmlGenericErrorContext,
  1062. "xmlSaveUri: out of memory\n");
  1063. xmlFree(ret);
  1064. return(NULL);
  1065. }
  1066. ret = temp;
  1067. }
  1068. len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
  1069. }
  1070. } else if (uri->authority != NULL) {
  1071. if (len + 3 >= max) {
  1072. max *= 2;
  1073. temp = (xmlChar *) xmlRealloc(ret,
  1074. (max + 1) * sizeof(xmlChar));
  1075. if (temp == NULL) {
  1076. xmlGenericError(xmlGenericErrorContext,
  1077. "xmlSaveUri: out of memory\n");
  1078. xmlFree(ret);
  1079. return(NULL);
  1080. }
  1081. ret = temp;
  1082. }
  1083. ret[len++] = '/';
  1084. ret[len++] = '/';
  1085. p = uri->authority;
  1086. while (*p != 0) {
  1087. if (len + 3 >= max) {
  1088. max *= 2;
  1089. temp = (xmlChar *) xmlRealloc(ret,
  1090. (max + 1) * sizeof(xmlChar));
  1091. if (temp == NULL) {
  1092. xmlGenericError(xmlGenericErrorContext,
  1093. "xmlSaveUri: out of memory\n");
  1094. xmlFree(ret);
  1095. return(NULL);
  1096. }
  1097. ret = temp;
  1098. }
  1099. if ((IS_UNRESERVED(*(p))) ||
  1100. ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
  1101. ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
  1102. ((*(p) == '=')) || ((*(p) == '+')))
  1103. ret[len++] = *p++;
  1104. else {
  1105. int val = *(unsigned char *)p++;
  1106. int hi = val / 0x10, lo = val % 0x10;
  1107. ret[len++] = '%';
  1108. ret[len++] = hi + (hi > 9? 'A'-10 : '0');
  1109. ret[len++] = lo + (lo > 9? 'A'-10 : '0');
  1110. }
  1111. }
  1112. } else if (uri->scheme != NULL) {
  1113. if (len + 3 >= max) {
  1114. max *= 2;
  1115. temp = (xmlChar *) xmlRealloc(ret,
  1116. (max + 1) * sizeof(xmlChar));
  1117. if (temp == NULL) {
  1118. xmlGenericError(xmlGenericErrorContext,
  1119. "xmlSaveUri: out of memory\n");
  1120. xmlFree(ret);
  1121. return(NULL);
  1122. }
  1123. ret = temp;
  1124. }
  1125. ret[len++] = '/';
  1126. ret[len++] = '/';
  1127. }
  1128. if (uri->path != NULL) {
  1129. p = uri->path;
  1130. /*
  1131. * the colon in file:///d: should not be escaped or
  1132. * Windows accesses fail later.
  1133. */
  1134. if ((uri->scheme != NULL) &&
  1135. (p[0] == '/') &&
  1136. (((p[1] >= 'a') && (p[1] <= 'z')) ||
  1137. ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
  1138. (p[2] == ':') &&
  1139. (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
  1140. if (len + 3 >= max) {
  1141. max *= 2;
  1142. ret = (xmlChar *) xmlRealloc(ret,
  1143. (max + 1) * sizeof(xmlChar));
  1144. if (ret == NULL) {
  1145. xmlGenericError(xmlGenericErrorContext,
  1146. "xmlSaveUri: out of memory\n");
  1147. return(NULL);
  1148. }
  1149. }
  1150. ret[len++] = *p++;
  1151. ret[len++] = *p++;
  1152. ret[len++] = *p++;
  1153. }
  1154. while (*p != 0) {
  1155. if (len + 3 >= max) {
  1156. max *= 2;
  1157. temp = (xmlChar *) xmlRealloc(ret,
  1158. (max + 1) * sizeof(xmlChar));
  1159. if (temp == NULL) {
  1160. xmlGenericError(xmlGenericErrorContext,
  1161. "xmlSaveUri: out of memory\n");
  1162. xmlFree(ret);
  1163. return(NULL);
  1164. }
  1165. ret = temp;
  1166. }
  1167. if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
  1168. ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
  1169. ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
  1170. ((*(p) == ',')))
  1171. ret[len++] = *p++;
  1172. else {
  1173. int val = *(unsigned char *)p++;
  1174. int hi = val / 0x10, lo = val % 0x10;
  1175. ret[len++] = '%';
  1176. ret[len++] = hi + (hi > 9? 'A'-10 : '0');
  1177. ret[len++] = lo + (lo > 9? 'A'-10 : '0');
  1178. }
  1179. }
  1180. }
  1181. if (uri->query_raw != NULL) {
  1182. if (len + 1 >= max) {
  1183. max *= 2;
  1184. temp = (xmlChar *) xmlRealloc(ret,
  1185. (max + 1) * sizeof(xmlChar));
  1186. if (temp == NULL) {
  1187. xmlGenericError(xmlGenericErrorContext,
  1188. "xmlSaveUri: out of memory\n");
  1189. xmlFree(ret);
  1190. return(NULL);
  1191. }
  1192. ret = temp;
  1193. }
  1194. ret[len++] = '?';
  1195. p = uri->query_raw;
  1196. while (*p != 0) {
  1197. if (len + 1 >= max) {
  1198. max *= 2;
  1199. temp = (xmlChar *) xmlRealloc(ret,
  1200. (max + 1) * sizeof(xmlChar));
  1201. if (temp == NULL) {
  1202. xmlGenericError(xmlGenericErrorContext,
  1203. "xmlSaveUri: out of memory\n");
  1204. xmlFree(ret);
  1205. return(NULL);
  1206. }
  1207. ret = temp;
  1208. }
  1209. ret[len++] = *p++;
  1210. }
  1211. } else if (uri->query != NULL) {
  1212. if (len + 3 >= max) {
  1213. max *= 2;
  1214. temp = (xmlChar *) xmlRealloc(ret,
  1215. (max + 1) * sizeof(xmlChar));
  1216. if (temp == NULL) {
  1217. xmlGenericError(xmlGenericErrorContext,
  1218. "xmlSaveUri: out of memory\n");
  1219. xmlFree(ret);
  1220. return(NULL);
  1221. }
  1222. ret = temp;
  1223. }
  1224. ret[len++] = '?';
  1225. p = uri->query;
  1226. while (*p != 0) {
  1227. if (len + 3 >= max) {
  1228. max *= 2;
  1229. temp = (xmlChar *) xmlRealloc(ret,
  1230. (max + 1) * sizeof(xmlChar));
  1231. if (temp == NULL) {
  1232. xmlGenericError(xmlGenericErrorContext,
  1233. "xmlSaveUri: out of memory\n");
  1234. xmlFree(ret);
  1235. return(NULL);
  1236. }
  1237. ret = temp;
  1238. }
  1239. if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
  1240. ret[len++] = *p++;
  1241. else {
  1242. int val = *(unsigned char *)p++;
  1243. int hi = val / 0x10, lo = val % 0x10;
  1244. ret[len++] = '%';
  1245. ret[len++] = hi + (hi > 9? 'A'-10 : '0');
  1246. ret[len++] = lo + (lo > 9? 'A'-10 : '0');
  1247. }
  1248. }
  1249. }
  1250. }
  1251. if (uri->fragment != NULL) {
  1252. if (len + 3 >= max) {
  1253. max *= 2;
  1254. temp = (xmlChar *) xmlRealloc(ret,
  1255. (max + 1) * sizeof(xmlChar));
  1256. if (temp == NULL) {
  1257. xmlGenericError(xmlGenericErrorContext,
  1258. "xmlSaveUri: out of memory\n");
  1259. xmlFree(ret);
  1260. return(NULL);
  1261. }
  1262. ret = temp;
  1263. }
  1264. ret[len++] = '#';
  1265. p = uri->fragment;
  1266. while (*p != 0) {
  1267. if (len + 3 >= max) {
  1268. max *= 2;
  1269. temp = (xmlChar *) xmlRealloc(ret,
  1270. (max + 1) * sizeof(xmlChar));
  1271. if (temp == NULL) {
  1272. xmlGenericError(xmlGenericErrorContext,
  1273. "xmlSaveUri: out of memory\n");
  1274. xmlFree(ret);
  1275. return(NULL);
  1276. }
  1277. ret = temp;
  1278. }
  1279. if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
  1280. ret[len++] = *p++;
  1281. else {
  1282. int val = *(unsigned char *)p++;
  1283. int hi = val / 0x10, lo = val % 0x10;
  1284. ret[len++] = '%';
  1285. ret[len++] = hi + (hi > 9? 'A'-10 : '0');
  1286. ret[len++] = lo + (lo > 9? 'A'-10 : '0');
  1287. }
  1288. }
  1289. }
  1290. if (len >= max) {
  1291. max *= 2;
  1292. temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
  1293. if (temp == NULL) {
  1294. xmlGenericError(xmlGenericErrorContext,
  1295. "xmlSaveUri: out of memory\n");
  1296. xmlFree(ret);
  1297. return(NULL);
  1298. }
  1299. ret = temp;
  1300. }
  1301. ret[len] = 0;
  1302. return(ret);
  1303. }
  1304. /**
  1305. * xmlPrintURI:
  1306. * @stream: a FILE* for the output
  1307. * @uri: pointer to an xmlURI
  1308. *
  1309. * Prints the URI in the stream @stream.
  1310. */
  1311. void
  1312. xmlPrintURI(FILE *stream, xmlURIPtr uri) {
  1313. xmlChar *out;
  1314. out = xmlSaveUri(uri);
  1315. if (out != NULL) {
  1316. fprintf(stream, "%s", (char *) out);
  1317. xmlFree(out);
  1318. }
  1319. }
  1320. /**
  1321. * xmlCleanURI:
  1322. * @uri: pointer to an xmlURI
  1323. *
  1324. * Make sure the xmlURI struct is free of content
  1325. */
  1326. static void
  1327. xmlCleanURI(xmlURIPtr uri) {
  1328. if (uri == NULL) return;
  1329. if (uri->scheme != NULL) xmlFree(uri->scheme);
  1330. uri->scheme = NULL;
  1331. if (uri->server != NULL) xmlFree(uri->server);
  1332. uri->server = NULL;
  1333. if (uri->user != NULL) xmlFree(uri->user);
  1334. uri->user = NULL;
  1335. if (uri->path != NULL) xmlFree(uri->path);
  1336. uri->path = NULL;
  1337. if (uri->fragment != NULL) xmlFree(uri->fragment);
  1338. uri->fragment = NULL;
  1339. if (uri->opaque != NULL) xmlFree(uri->opaque);
  1340. uri->opaque = NULL;
  1341. if (uri->authority != NULL) xmlFree(uri->authority);
  1342. uri->authority = NULL;
  1343. if (uri->query != NULL) xmlFree(uri->query);
  1344. uri->query = NULL;
  1345. if (uri->query_raw != NULL) xmlFree(uri->query_raw);
  1346. uri->query_raw = NULL;
  1347. }
  1348. /**
  1349. * xmlFreeURI:
  1350. * @uri: pointer to an xmlURI
  1351. *
  1352. * Free up the xmlURI struct
  1353. */
  1354. void
  1355. xmlFreeURI(xmlURIPtr uri) {
  1356. if (uri == NULL) return;
  1357. if (uri->scheme != NULL) xmlFree(uri->scheme);
  1358. if (uri->server != NULL) xmlFree(uri->server);
  1359. if (uri->user != NULL) xmlFree(uri->user);
  1360. if (uri->path != NULL) xmlFree(uri->path);
  1361. if (uri->fragment != NULL) xmlFree(uri->fragment);
  1362. if (uri->opaque != NULL) xmlFree(uri->opaque);
  1363. if (uri->authority != NULL) xmlFree(uri->authority);
  1364. if (uri->query != NULL) xmlFree(uri->query);
  1365. if (uri->query_raw != NULL) xmlFree(uri->query_raw);
  1366. xmlFree(uri);
  1367. }
  1368. /************************************************************************
  1369. * *
  1370. * Helper functions *
  1371. * *
  1372. ************************************************************************/
  1373. /**
  1374. * xmlNormalizeURIPath:
  1375. * @path: pointer to the path string
  1376. *
  1377. * Applies the 5 normalization steps to a path string--that is, RFC 2396
  1378. * Section 5.2, steps 6.c through 6.g.
  1379. *
  1380. * Normalization occurs directly on the string, no new allocation is done
  1381. *
  1382. * Returns 0 or an error code
  1383. */
  1384. int
  1385. xmlNormalizeURIPath(char *path) {
  1386. char *cur, *out;
  1387. if (path == NULL)
  1388. return(-1);
  1389. /* Skip all initial "/" chars. We want to get to the beginning of the
  1390. * first non-empty segment.
  1391. */
  1392. cur = path;
  1393. while (cur[0] == '/')
  1394. ++cur;
  1395. if (cur[0] == '\0')
  1396. return(0);
  1397. /* Keep everything we've seen so far. */
  1398. out = cur;
  1399. /*
  1400. * Analyze each segment in sequence for cases (c) and (d).
  1401. */
  1402. while (cur[0] != '\0') {
  1403. /*
  1404. * c) All occurrences of "./", where "." is a complete path segment,
  1405. * are removed from the buffer string.
  1406. */
  1407. if ((cur[0] == '.') && (cur[1] == '/')) {
  1408. cur += 2;
  1409. /* '//' normalization should be done at this point too */
  1410. while (cur[0] == '/')
  1411. cur++;
  1412. continue;
  1413. }
  1414. /*
  1415. * d) If the buffer string ends with "." as a complete path segment,
  1416. * that "." is removed.
  1417. */
  1418. if ((cur[0] == '.') && (cur[1] == '\0'))
  1419. break;
  1420. /* Otherwise keep the segment. */
  1421. while (cur[0] != '/') {
  1422. if (cur[0] == '\0')
  1423. goto done_cd;
  1424. (out++)[0] = (cur++)[0];
  1425. }
  1426. /* nomalize // */
  1427. while ((cur[0] == '/') && (cur[1] == '/'))
  1428. cur++;
  1429. (out++)[0] = (cur++)[0];
  1430. }
  1431. done_cd:
  1432. out[0] = '\0';
  1433. /* Reset to the beginning of the first segment for the next sequence. */
  1434. cur = path;
  1435. while (cur[0] == '/')
  1436. ++cur;
  1437. if (cur[0] == '\0')
  1438. return(0);
  1439. /*
  1440. * Analyze each segment in sequence for cases (e) and (f).
  1441. *
  1442. * e) All occurrences of "<segment>/../", where <segment> is a
  1443. * complete path segment not equal to "..", are removed from the
  1444. * buffer string. Removal of these path segments is performed
  1445. * iteratively, removing the leftmost matching pattern on each
  1446. * iteration, until no matching pattern remains.
  1447. *
  1448. * f) If the buffer string ends with "<segment>/..", where <segment>
  1449. * is a complete path segment not equal to "..", that
  1450. * "<segment>/.." is removed.
  1451. *
  1452. * To satisfy the "iterative" clause in (e), we need to collapse the
  1453. * string every time we find something that needs to be removed. Thus,
  1454. * we don't need to keep two pointers into the string: we only need a
  1455. * "current position" pointer.
  1456. */
  1457. while (1) {
  1458. char *segp, *tmp;
  1459. /* At the beginning of each iteration of this loop, "cur" points to
  1460. * the first character of the segment we want to examine.
  1461. */
  1462. /* Find the end of the current segment. */
  1463. segp = cur;
  1464. while ((segp[0] != '/') && (segp[0] != '\0'))
  1465. ++segp;
  1466. /* If this is the last segment, we're done (we need at least two
  1467. * segments to meet the criteria for the (e) and (f) cases).
  1468. */
  1469. if (segp[0] == '\0')
  1470. break;
  1471. /* If the first segment is "..", or if the next segment _isn't_ "..",
  1472. * keep this segment and try the next one.
  1473. */
  1474. ++segp;
  1475. if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
  1476. || ((segp[0] != '.') || (segp[1] != '.')
  1477. || ((segp[2] != '/') && (segp[2] != '\0')))) {
  1478. cur = segp;
  1479. continue;
  1480. }
  1481. /* If we get here, remove this segment and the next one and back up
  1482. * to the previous segment (if there is one), to implement the
  1483. * "iteratively" clause. It's pretty much impossible to back up
  1484. * while maintaining two pointers into the buffer, so just compact
  1485. * the whole buffer now.
  1486. */
  1487. /* If this is the end of the buffer, we're done. */
  1488. if (segp[2] == '\0') {
  1489. cur[0] = '\0';
  1490. break;
  1491. }
  1492. /* Valgrind complained, strcpy(cur, segp + 3); */
  1493. /* string will overlap, do not use strcpy */
  1494. tmp = cur;
  1495. segp += 3;
  1496. while ((*tmp++ = *segp++) != 0);
  1497. /* If there are no previous segments, then keep going from here. */
  1498. segp = cur;
  1499. while ((segp > path) && ((--segp)[0] == '/'))
  1500. ;
  1501. if (segp == path)
  1502. continue;
  1503. /* "segp" is pointing to the end of a previous segment; find it's
  1504. * start. We need to back up to the previous segment and start
  1505. * over with that to handle things like "foo/bar/../..". If we
  1506. * don't do this, then on the first pass we'll remove the "bar/..",
  1507. * but be pointing at the second ".." so we won't realize we can also
  1508. * remove the "foo/..".
  1509. */
  1510. cur = segp;
  1511. while ((cur > path) && (cur[-1] != '/'))
  1512. --cur;
  1513. }
  1514. out[0] = '\0';
  1515. /*
  1516. * g) If the resulting buffer string still begins with one or more
  1517. * complete path segments of "..", then the reference is
  1518. * considered to be in error. Implementations may handle this
  1519. * error by retaining these components in the resolved path (i.e.,
  1520. * treating them as part of the final URI), by removing them from
  1521. * the resolved path (i.e., discarding relative levels above the
  1522. * root), or by avoiding traversal of the reference.
  1523. *
  1524. * We discard them from the final path.
  1525. */
  1526. if (path[0] == '/') {
  1527. cur = path;
  1528. while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
  1529. && ((cur[3] == '/') || (cur[3] == '\0')))
  1530. cur += 3;
  1531. if (cur != path) {
  1532. out = path;
  1533. while (cur[0] != '\0')
  1534. (out++)[0] = (cur++)[0];
  1535. out[0] = 0;
  1536. }
  1537. }
  1538. return(0);
  1539. }
  1540. static int is_hex(char c) {
  1541. if (((c >= '0') && (c <= '9')) ||
  1542. ((c >= 'a') && (c <= 'f')) ||
  1543. ((c >= 'A') && (c <= 'F')))
  1544. return(1);
  1545. return(0);
  1546. }
  1547. /**
  1548. * xmlURIUnescapeString:
  1549. * @str: the string to unescape
  1550. * @len: the length in bytes to unescape (or <= 0 to indicate full string)
  1551. * @target: optional destination buffer
  1552. *
  1553. * Unescaping routine, but does not check that the string is an URI. The
  1554. * output is a direct unsigned char translation of %XX values (no encoding)
  1555. * Note that the length of the result can only be smaller or same size as
  1556. * the input string.
  1557. *
  1558. * Returns a copy of the string, but unescaped, will return NULL only in case
  1559. * of error
  1560. */
  1561. char *
  1562. xmlURIUnescapeString(const char *str, int len, char *target) {
  1563. char *ret, *out;
  1564. const char *in;
  1565. if (str == NULL)
  1566. return(NULL);
  1567. if (len <= 0) len = strlen(str);
  1568. if (len < 0) return(NULL);
  1569. if (target == NULL) {
  1570. ret = (char *) xmlMallocAtomic(len + 1);
  1571. if (ret == NULL) {
  1572. xmlGenericError(xmlGenericErrorContext,
  1573. "xmlURIUnescapeString: out of memory\n");
  1574. return(NULL);
  1575. }
  1576. } else
  1577. ret = target;
  1578. in = str;
  1579. out = ret;
  1580. while(len > 0) {
  1581. if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
  1582. in++;
  1583. if ((*in >= '0') && (*in <= '9'))
  1584. *out = (*in - '0');
  1585. else if ((*in >= 'a') && (*in <= 'f'))
  1586. *out = (*in - 'a') + 10;
  1587. else if ((*in >= 'A') && (*in <= 'F'))
  1588. *out = (*in - 'A') + 10;
  1589. in++;
  1590. if ((*in >= '0') && (*in <= '9'))
  1591. *out = *out * 16 + (*in - '0');
  1592. else if ((*in >= 'a') && (*in <= 'f'))
  1593. *out = *out * 16 + (*in - 'a') + 10;
  1594. else if ((*in >= 'A') && (*in <= 'F'))
  1595. *out = *out * 16 + (*in - 'A') + 10;
  1596. in++;
  1597. len -= 3;
  1598. out++;
  1599. } else {
  1600. *out++ = *in++;
  1601. len--;
  1602. }
  1603. }
  1604. *out = 0;
  1605. return(ret);
  1606. }
  1607. /**
  1608. * xmlURIEscapeStr:
  1609. * @str: string to escape
  1610. * @list: exception list string of chars not to escape
  1611. *
  1612. * This routine escapes a string to hex, ignoring reserved characters (a-z)
  1613. * and the characters in the exception list.
  1614. *
  1615. * Returns a new escaped string or NULL in case of error.
  1616. */
  1617. xmlChar *
  1618. xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
  1619. xmlChar *ret, ch;
  1620. xmlChar *temp;
  1621. const xmlChar *in;
  1622. unsigned int len, out;
  1623. if (str == NULL)
  1624. return(NULL);
  1625. if (str[0] == 0)
  1626. return(xmlStrdup(str));
  1627. len = xmlStrlen(str);
  1628. if (!(len > 0)) return(NULL);
  1629. len += 20;
  1630. ret = (xmlChar *) xmlMallocAtomic(len);
  1631. if (ret == NULL) {
  1632. xmlGenericError(xmlGenericErrorContext,
  1633. "xmlURIEscapeStr: out of memory\n");
  1634. return(NULL);
  1635. }
  1636. in = (const xmlChar *) str;
  1637. out = 0;
  1638. while(*in != 0) {
  1639. if (len - out <= 3) {
  1640. len += 20;
  1641. temp = (xmlChar *) xmlRealloc(ret, len);
  1642. if (temp == NULL) {
  1643. xmlGenericError(xmlGenericErrorContext,
  1644. "xmlURIEscapeStr: out of memory\n");
  1645. xmlFree(ret);
  1646. return(NULL);
  1647. }
  1648. ret = temp;
  1649. }
  1650. ch = *in;
  1651. if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
  1652. unsigned char val;
  1653. ret[out++] = '%';
  1654. val = ch >> 4;
  1655. if (val <= 9)
  1656. ret[out++] = '0' + val;
  1657. else
  1658. ret[out++] = 'A' + val - 0xA;
  1659. val = ch & 0xF;
  1660. if (val <= 9)
  1661. ret[out++] = '0' + val;
  1662. else
  1663. ret[out++] = 'A' + val - 0xA;
  1664. in++;
  1665. } else {
  1666. ret[out++] = *in++;
  1667. }
  1668. }
  1669. ret[out] = 0;
  1670. return(ret);
  1671. }
  1672. /**
  1673. * xmlURIEscape:
  1674. * @str: the string of the URI to escape
  1675. *
  1676. * Escaping routine, does not do validity checks !
  1677. * It will try to escape the chars needing this, but this is heuristic
  1678. * based it's impossible to be sure.
  1679. *
  1680. * Returns an copy of the string, but escaped
  1681. *
  1682. * 25 May 2001
  1683. * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
  1684. * according to RFC2396.
  1685. * - Carl Douglas
  1686. */
  1687. xmlChar *
  1688. xmlURIEscape(const xmlChar * str)
  1689. {
  1690. xmlChar *ret, *segment = NULL;
  1691. xmlURIPtr uri;
  1692. int ret2;
  1693. #define NULLCHK(p) if(!p) { \
  1694. xmlGenericError(xmlGenericErrorContext, \
  1695. "xmlURIEscape: out of memory\n"); \
  1696. xmlFreeURI(uri); \
  1697. return NULL; } \
  1698. if (str == NULL)
  1699. return (NULL);
  1700. uri = xmlCreateURI();
  1701. if (uri != NULL) {
  1702. /*
  1703. * Allow escaping errors in the unescaped form
  1704. */
  1705. uri->cleanup = 1;
  1706. ret2 = xmlParseURIReference(uri, (const char *)str);
  1707. if (ret2) {
  1708. xmlFreeURI(uri);
  1709. return (NULL);
  1710. }
  1711. }
  1712. if (!uri)
  1713. return NULL;
  1714. ret = NULL;
  1715. if (uri->scheme) {
  1716. segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
  1717. NULLCHK(segment)
  1718. ret = xmlStrcat(ret, segment);
  1719. ret = xmlStrcat(ret, BAD_CAST ":");
  1720. xmlFree(segment);
  1721. }
  1722. if (uri->authority) {
  1723. segment =
  1724. xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
  1725. NULLCHK(segment)
  1726. ret = xmlStrcat(ret, BAD_CAST "//");
  1727. ret = xmlStrcat(ret, segment);
  1728. xmlFree(segment);
  1729. }
  1730. if (uri->user) {
  1731. segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
  1732. NULLCHK(segment)
  1733. ret = xmlStrcat(ret,BAD_CAST "//");
  1734. ret = xmlStrcat(ret, segment);
  1735. ret = xmlStrcat(ret, BAD_CAST "@");
  1736. xmlFree(segment);
  1737. }
  1738. if (uri->server) {
  1739. segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
  1740. NULLCHK(segment)
  1741. if (uri->user == NULL)
  1742. ret = xmlStrcat(ret, BAD_CAST "//");
  1743. ret = xmlStrcat(ret, segment);
  1744. xmlFree(segment);
  1745. }
  1746. if (uri->port) {
  1747. xmlChar port[10];
  1748. snprintf((char *) port, 10, "%d", uri->port);
  1749. ret = xmlStrcat(ret, BAD_CAST ":");
  1750. ret = xmlStrcat(ret, port);
  1751. }
  1752. if (uri->path) {
  1753. segment =
  1754. xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
  1755. NULLCHK(segment)
  1756. ret = xmlStrcat(ret, segment);
  1757. xmlFree(segment);
  1758. }
  1759. if (uri->query_raw) {
  1760. ret = xmlStrcat(ret, BAD_CAST "?");
  1761. ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
  1762. }
  1763. else if (uri->query) {
  1764. segment =
  1765. xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
  1766. NULLCHK(segment)
  1767. ret = xmlStrcat(ret, BAD_CAST "?");
  1768. ret = xmlStrcat(ret, segment);
  1769. xmlFree(segment);
  1770. }
  1771. if (uri->opaque) {
  1772. segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
  1773. NULLCHK(segment)
  1774. ret = xmlStrcat(ret, segment);
  1775. xmlFree(segment);
  1776. }
  1777. if (uri->fragment) {
  1778. segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
  1779. NULLCHK(segment)
  1780. ret = xmlStrcat(ret, BAD_CAST "#");
  1781. ret = xmlStrcat(ret, segment);
  1782. xmlFree(segment);
  1783. }
  1784. xmlFreeURI(uri);
  1785. #undef NULLCHK
  1786. return (ret);
  1787. }
  1788. /************************************************************************
  1789. * *
  1790. * Public functions *
  1791. * *
  1792. ************************************************************************/
  1793. /**
  1794. * xmlBuildURI:
  1795. * @URI: the URI instance found in the document
  1796. * @base: the base value
  1797. *
  1798. * Computes he final URI of the reference done by checking that
  1799. * the given URI is valid, and building the final URI using the
  1800. * base URI. This is processed according to section 5.2 of the
  1801. * RFC 2396
  1802. *
  1803. * 5.2. Resolving Relative References to Absolute Form
  1804. *
  1805. * Returns a new URI string (to be freed by the caller) or NULL in case
  1806. * of error.
  1807. */
  1808. xmlChar *
  1809. xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
  1810. xmlChar *val = NULL;
  1811. int ret, len, indx, cur, out;
  1812. xmlURIPtr ref = NULL;
  1813. xmlURIPtr bas = NULL;
  1814. xmlURIPtr res = NULL;
  1815. /*
  1816. * 1) The URI reference is parsed into the potential four components and
  1817. * fragment identifier, as described in Section 4.3.
  1818. *
  1819. * NOTE that a completely empty URI is treated by modern browsers
  1820. * as a reference to "." rather than as a synonym for the current
  1821. * URI. Should we do that here?
  1822. */
  1823. if (URI == NULL)
  1824. ret = -1;
  1825. else {
  1826. if (*URI) {
  1827. ref = xmlCreateURI();
  1828. if (ref == NULL)
  1829. goto done;
  1830. ret = xmlParseURIReference(ref, (const char *) URI);
  1831. }
  1832. else
  1833. ret = 0;
  1834. }
  1835. if (ret != 0)
  1836. goto done;
  1837. if ((ref != NULL) && (ref->scheme != NULL)) {
  1838. /*
  1839. * The URI is absolute don't modify.
  1840. */
  1841. val = xmlStrdup(URI);
  1842. goto done;
  1843. }
  1844. if (base == NULL)
  1845. ret = -1;
  1846. else {
  1847. bas = xmlCreateURI();
  1848. if (bas == NULL)
  1849. goto done;
  1850. ret = xmlParseURIReference(bas, (const char *) base);
  1851. }
  1852. if (ret != 0) {
  1853. if (ref)
  1854. val = xmlSaveUri(ref);
  1855. goto done;
  1856. }
  1857. if (ref == NULL) {
  1858. /*
  1859. * the base fragment must be ignored
  1860. */
  1861. if (bas->fragment != NULL) {
  1862. xmlFree(bas->fragment);
  1863. bas->fragment = NULL;
  1864. }
  1865. val = xmlSaveUri(bas);
  1866. goto done;
  1867. }
  1868. /*
  1869. * 2) If the path component is empty and the scheme, authority, and
  1870. * query components are undefined, then it is a reference to the
  1871. * current document and we are done. Otherwise, the reference URI's
  1872. * query and fragment components are defined as found (or not found)
  1873. * within the URI reference and not inherited from the base URI.
  1874. *
  1875. * NOTE that in modern browsers, the parsing differs from the above
  1876. * in the following aspect: the query component is allowed to be
  1877. * defined while still treating this as a reference to the current
  1878. * document.
  1879. */
  1880. res = xmlCreateURI();
  1881. if (res == NULL)
  1882. goto done;
  1883. if ((ref->scheme == NULL) && (ref->path == NULL) &&
  1884. ((ref->authority == NULL) && (ref->server == NULL))) {
  1885. if (bas->scheme != NULL)
  1886. res->scheme = xmlMemStrdup(bas->scheme);
  1887. if (bas->authority != NULL)
  1888. res->authority = xmlMemStrdup(bas->authority);
  1889. else if (bas->server != NULL) {
  1890. res->server = xmlMemStrdup(bas->server);
  1891. if (bas->user != NULL)
  1892. res->user = xmlMemStrdup(bas->user);
  1893. res->port = bas->port;
  1894. }
  1895. if (bas->path != NULL)
  1896. res->path = xmlMemStrdup(bas->path);
  1897. if (ref->query_raw != NULL)
  1898. res->query_raw = xmlMemStrdup (ref->query_raw);
  1899. else if (ref->query != NULL)
  1900. res->query = xmlMemStrdup(ref->query);
  1901. else if (bas->query_raw != NULL)
  1902. res->query_raw = xmlMemStrdup(bas->query_raw);
  1903. else if (bas->query != NULL)
  1904. res->query = xmlMemStrdup(bas->query);
  1905. if (ref->fragment != NULL)
  1906. res->fragment = xmlMemStrdup(ref->fragment);
  1907. goto step_7;
  1908. }
  1909. /*
  1910. * 3) If the scheme component is defined, indicating that the reference
  1911. * starts with a scheme name, then the reference is interpreted as an
  1912. * absolute URI and we are done. Otherwise, the reference URI's
  1913. * scheme is inherited from the base URI's scheme component.
  1914. */
  1915. if (ref->scheme != NULL) {
  1916. val = xmlSaveUri(ref);
  1917. goto done;
  1918. }
  1919. if (bas->scheme != NULL)
  1920. res->scheme = xmlMemStrdup(bas->scheme);
  1921. if (ref->query_raw != NULL)
  1922. res->query_raw = xmlMemStrdup(ref->query_raw);
  1923. else if (ref->query != NULL)
  1924. res->query = xmlMemStrdup(ref->query);
  1925. if (ref->fragment != NULL)
  1926. res->fragment = xmlMemStrdup(ref->fragment);
  1927. /*
  1928. * 4) If the authority component is defined, then the reference is a
  1929. * network-path and we skip to step 7. Otherwise, the reference
  1930. * URI's authority is inherited from the base URI's authority
  1931. * component, which will also be undefined if the URI scheme does not
  1932. * use an authority component.
  1933. */
  1934. if ((ref->authority != NULL) || (ref->server != NULL)) {
  1935. if (ref->authority != NULL)
  1936. res->authority = xmlMemStrdup(ref->authority);
  1937. else {
  1938. res->server = xmlMemStrdup(ref->server);
  1939. if (ref->user != NULL)
  1940. res->user = xmlMemStrdup(ref->user);
  1941. res->port = ref->port;
  1942. }
  1943. if (ref->path != NULL)
  1944. res->path = xmlMemStrdup(ref->path);
  1945. goto step_7;
  1946. }
  1947. if (bas->authority != NULL)
  1948. res->authority = xmlMemStrdup(bas->authority);
  1949. else if (bas->server != NULL) {
  1950. res->server = xmlMemStrdup(bas->server);
  1951. if (bas->user != NULL)
  1952. res->user = xmlMemStrdup(bas->user);
  1953. res->port = bas->port;
  1954. }
  1955. /*
  1956. * 5) If the path component begins with a slash character ("/"), then
  1957. * the reference is an absolute-path and we skip to step 7.
  1958. */
  1959. if ((ref->path != NULL) && (ref->path[0] == '/')) {
  1960. res->path = xmlMemStrdup(ref->path);
  1961. goto step_7;
  1962. }
  1963. /*
  1964. * 6) If this step is reached, then we are resolving a relative-path
  1965. * reference. The relative path needs to be merged with the base
  1966. * URI's path. Although there are many ways to do this, we will
  1967. * describe a simple method using a separate string buffer.
  1968. *
  1969. * Allocate a buffer large enough for the result string.
  1970. */
  1971. len = 2; /* extra / and 0 */
  1972. if (ref->path != NULL)
  1973. len += strlen(ref->path);
  1974. if (bas->path != NULL)
  1975. len += strlen(bas->path);
  1976. res->path = (char *) xmlMallocAtomic(len);
  1977. if (res->path == NULL) {
  1978. xmlGenericError(xmlGenericErrorContext,
  1979. "xmlBuildURI: out of memory\n");
  1980. goto done;
  1981. }
  1982. res->path[0] = 0;
  1983. /*
  1984. * a) All but the last segment of the base URI's path component is
  1985. * copied to the buffer. In other words, any characters after the
  1986. * last (right-most) slash character, if any, are excluded.
  1987. */
  1988. cur = 0;
  1989. out = 0;
  1990. if (bas->path != NULL) {
  1991. while (bas->path[cur] != 0) {
  1992. while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
  1993. cur++;
  1994. if (bas->path[cur] == 0)
  1995. break;
  1996. cur++;
  1997. while (out < cur) {
  1998. res->path[out] = bas->path[out];
  1999. out++;
  2000. }
  2001. }
  2002. }
  2003. res->path[out] = 0;
  2004. /*
  2005. * b) The reference's path component is appended to the buffer
  2006. * string.
  2007. */
  2008. if (ref->path != NULL && ref->path[0] != 0) {
  2009. indx = 0;
  2010. /*
  2011. * Ensure the path includes a '/'
  2012. */
  2013. if ((out == 0) && (bas->server != NULL))
  2014. res->path[out++] = '/';
  2015. while (ref->path[indx] != 0) {
  2016. res->path[out++] = ref->path[indx++];
  2017. }
  2018. }
  2019. res->path[out] = 0;
  2020. /*
  2021. * Steps c) to h) are really path normalization steps
  2022. */
  2023. xmlNormalizeURIPath(res->path);
  2024. step_7:
  2025. /*
  2026. * 7) The resulting URI components, including any inherited from the
  2027. * base URI, are recombined to give the absolute form of the URI
  2028. * reference.
  2029. */
  2030. val = xmlSaveUri(res);
  2031. done:
  2032. if (ref != NULL)
  2033. xmlFreeURI(ref);
  2034. if (bas != NULL)
  2035. xmlFreeURI(bas);
  2036. if (res != NULL)
  2037. xmlFreeURI(res);
  2038. return(val);
  2039. }
  2040. /**
  2041. * xmlBuildRelativeURI:
  2042. * @URI: the URI reference under consideration
  2043. * @base: the base value
  2044. *
  2045. * Expresses the URI of the reference in terms relative to the
  2046. * base. Some examples of this operation include:
  2047. * base = "http://site1.com/docs/book1.html"
  2048. * URI input URI returned
  2049. * docs/pic1.gif pic1.gif
  2050. * docs/img/pic1.gif img/pic1.gif
  2051. * img/pic1.gif ../img/pic1.gif
  2052. * http://site1.com/docs/pic1.gif pic1.gif
  2053. * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
  2054. *
  2055. * base = "docs/book1.html"
  2056. * URI input URI returned
  2057. * docs/pic1.gif pic1.gif
  2058. * docs/img/pic1.gif img/pic1.gif
  2059. * img/pic1.gif ../img/pic1.gif
  2060. * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
  2061. *
  2062. *
  2063. * Note: if the URI reference is really wierd or complicated, it may be
  2064. * worthwhile to first convert it into a "nice" one by calling
  2065. * xmlBuildURI (using 'base') before calling this routine,
  2066. * since this routine (for reasonable efficiency) assumes URI has
  2067. * already been through some validation.
  2068. *
  2069. * Returns a new URI string (to be freed by the caller) or NULL in case
  2070. * error.
  2071. */
  2072. xmlChar *
  2073. xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
  2074. {
  2075. xmlChar *val = NULL;
  2076. int ret;
  2077. int ix;
  2078. int pos = 0;
  2079. int nbslash = 0;
  2080. int len;
  2081. xmlURIPtr ref = NULL;
  2082. xmlURIPtr bas = NULL;
  2083. xmlChar *bptr, *uptr, *vptr;
  2084. int remove_path = 0;
  2085. if ((URI == NULL) || (*URI == 0))
  2086. return NULL;
  2087. /*
  2088. * First parse URI into a standard form
  2089. */
  2090. ref = xmlCreateURI ();
  2091. if (ref == NULL)
  2092. return NULL;
  2093. /* If URI not already in "relative" form */
  2094. if (URI[0] != '.') {
  2095. ret = xmlParseURIReference (ref, (const char *) URI);
  2096. if (ret != 0)
  2097. goto done; /* Error in URI, return NULL */
  2098. } else
  2099. ref->path = (char *)xmlStrdup(URI);
  2100. /*
  2101. * Next parse base into the same standard form
  2102. */
  2103. if ((base == NULL) || (*base == 0)) {
  2104. val = xmlStrdup (URI);
  2105. goto done;
  2106. }
  2107. bas = xmlCreateURI ();
  2108. if (bas == NULL)
  2109. goto done;
  2110. if (base[0] != '.') {
  2111. ret = xmlParseURIReference (bas, (const char *) base);
  2112. if (ret != 0)
  2113. goto done; /* Error in base, return NULL */
  2114. } else
  2115. bas->path = (char *)xmlStrdup(base);
  2116. /*
  2117. * If the scheme / server on the URI differs from the base,
  2118. * just return the URI
  2119. */
  2120. if ((ref->scheme != NULL) &&
  2121. ((bas->scheme == NULL) ||
  2122. (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
  2123. (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
  2124. val = xmlStrdup (URI);
  2125. goto done;
  2126. }
  2127. if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
  2128. val = xmlStrdup(BAD_CAST "");
  2129. goto done;
  2130. }
  2131. if (bas->path == NULL) {
  2132. val = xmlStrdup((xmlChar *)ref->path);
  2133. goto done;
  2134. }
  2135. if (ref->path == NULL) {
  2136. ref->path = (char *) "/";
  2137. remove_path = 1;
  2138. }
  2139. /*
  2140. * At this point (at last!) we can compare the two paths
  2141. *
  2142. * First we take care of the special case where either of the
  2143. * two path components may be missing (bug 316224)
  2144. */
  2145. if (bas->path == NULL) {
  2146. if (ref->path != NULL) {
  2147. uptr = (xmlChar *) ref->path;
  2148. if (*uptr == '/')
  2149. uptr++;
  2150. /* exception characters from xmlSaveUri */
  2151. val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
  2152. }
  2153. goto done;
  2154. }
  2155. bptr = (xmlChar *)bas->path;
  2156. if (ref->path == NULL) {
  2157. for (ix = 0; bptr[ix] != 0; ix++) {
  2158. if (bptr[ix] == '/')
  2159. nbslash++;
  2160. }
  2161. uptr = NULL;
  2162. len = 1; /* this is for a string terminator only */
  2163. } else {
  2164. /*
  2165. * Next we compare the two strings and find where they first differ
  2166. */
  2167. if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
  2168. pos += 2;
  2169. if ((*bptr == '.') && (bptr[1] == '/'))
  2170. bptr += 2;
  2171. else if ((*bptr == '/') && (ref->path[pos] != '/'))
  2172. bptr++;
  2173. while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
  2174. pos++;
  2175. if (bptr[pos] == ref->path[pos]) {
  2176. val = xmlStrdup(BAD_CAST "");
  2177. goto done; /* (I can't imagine why anyone would do this) */
  2178. }
  2179. /*
  2180. * In URI, "back up" to the last '/' encountered. This will be the
  2181. * beginning of the "unique" suffix of URI
  2182. */
  2183. ix = pos;
  2184. if ((ref->path[ix] == '/') && (ix > 0))
  2185. ix--;
  2186. else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
  2187. ix -= 2;
  2188. for (; ix > 0; ix--) {
  2189. if (ref->path[ix] == '/')
  2190. break;
  2191. }
  2192. if (ix == 0) {
  2193. uptr = (xmlChar *)ref->path;
  2194. } else {
  2195. ix++;
  2196. uptr = (xmlChar *)&ref->path[ix];
  2197. }
  2198. /*
  2199. * In base, count the number of '/' from the differing point
  2200. */
  2201. if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
  2202. for (; bptr[ix] != 0; ix++) {
  2203. if (bptr[ix] == '/')
  2204. nbslash++;
  2205. }
  2206. }
  2207. len = xmlStrlen (uptr) + 1;
  2208. }
  2209. if (nbslash == 0) {
  2210. if (uptr != NULL)
  2211. /* exception characters from xmlSaveUri */
  2212. val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
  2213. goto done;
  2214. }
  2215. /*
  2216. * Allocate just enough space for the returned string -
  2217. * length of the remainder of the URI, plus enough space
  2218. * for the "../" groups, plus one for the terminator
  2219. */
  2220. val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
  2221. if (val == NULL) {
  2222. xmlGenericError(xmlGenericErrorContext,
  2223. "xmlBuildRelativeURI: out of memory\n");
  2224. goto done;
  2225. }
  2226. vptr = val;
  2227. /*
  2228. * Put in as many "../" as needed
  2229. */
  2230. for (; nbslash>0; nbslash--) {
  2231. *vptr++ = '.';
  2232. *vptr++ = '.';
  2233. *vptr++ = '/';
  2234. }
  2235. /*
  2236. * Finish up with the end of the URI
  2237. */
  2238. if (uptr != NULL) {
  2239. if ((vptr > val) && (len > 0) &&
  2240. (uptr[0] == '/') && (vptr[-1] == '/')) {
  2241. memcpy (vptr, uptr + 1, len - 1);
  2242. vptr[len - 2] = 0;
  2243. } else {
  2244. memcpy (vptr, uptr, len);
  2245. vptr[len - 1] = 0;
  2246. }
  2247. } else {
  2248. vptr[len - 1] = 0;
  2249. }
  2250. /* escape the freshly-built path */
  2251. vptr = val;
  2252. /* exception characters from xmlSaveUri */
  2253. val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
  2254. xmlFree(vptr);
  2255. done:
  2256. /*
  2257. * Free the working variables
  2258. */
  2259. if (remove_path != 0)
  2260. ref->path = NULL;
  2261. if (ref != NULL)
  2262. xmlFreeURI (ref);
  2263. if (bas != NULL)
  2264. xmlFreeURI (bas);
  2265. return val;
  2266. }
  2267. /**
  2268. * xmlCanonicPath:
  2269. * @path: the resource locator in a filesystem notation
  2270. *
  2271. * Constructs a canonic path from the specified path.
  2272. *
  2273. * Returns a new canonic path, or a duplicate of the path parameter if the
  2274. * construction fails. The caller is responsible for freeing the memory occupied
  2275. * by the returned string. If there is insufficient memory available, or the
  2276. * argument is NULL, the function returns NULL.
  2277. */
  2278. #define IS_WINDOWS_PATH(p) \
  2279. ((p != NULL) && \
  2280. (((p[0] >= 'a') && (p[0] <= 'z')) || \
  2281. ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
  2282. (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
  2283. xmlChar *
  2284. xmlCanonicPath(const xmlChar *path)
  2285. {
  2286. /*
  2287. * For Windows implementations, additional work needs to be done to
  2288. * replace backslashes in pathnames with "forward slashes"
  2289. */
  2290. #if defined(_WIN32) && !defined(__CYGWIN__)
  2291. int len = 0;
  2292. int i = 0;
  2293. xmlChar *p = NULL;
  2294. #endif
  2295. xmlURIPtr uri;
  2296. xmlChar *ret;
  2297. const xmlChar *absuri;
  2298. if (path == NULL)
  2299. return(NULL);
  2300. /* sanitize filename starting with // so it can be used as URI */
  2301. if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
  2302. path++;
  2303. if ((uri = xmlParseURI((const char *) path)) != NULL) {
  2304. xmlFreeURI(uri);
  2305. return xmlStrdup(path);
  2306. }
  2307. /* Check if this is an "absolute uri" */
  2308. absuri = xmlStrstr(path, BAD_CAST "://");
  2309. if (absuri != NULL) {
  2310. int l, j;
  2311. unsigned char c;
  2312. xmlChar *escURI;
  2313. /*
  2314. * this looks like an URI where some parts have not been
  2315. * escaped leading to a parsing problem. Check that the first
  2316. * part matches a protocol.
  2317. */
  2318. l = absuri - path;
  2319. /* Bypass if first part (part before the '://') is > 20 chars */
  2320. if ((l <= 0) || (l > 20))
  2321. goto path_processing;
  2322. /* Bypass if any non-alpha characters are present in first part */
  2323. for (j = 0;j < l;j++) {
  2324. c = path[j];
  2325. if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
  2326. goto path_processing;
  2327. }
  2328. /* Escape all except the characters specified in the supplied path */
  2329. escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
  2330. if (escURI != NULL) {
  2331. /* Try parsing the escaped path */
  2332. uri = xmlParseURI((const char *) escURI);
  2333. /* If successful, return the escaped string */
  2334. if (uri != NULL) {
  2335. xmlFreeURI(uri);
  2336. return escURI;
  2337. }
  2338. }
  2339. }
  2340. path_processing:
  2341. /* For Windows implementations, replace backslashes with 'forward slashes' */
  2342. #if defined(_WIN32) && !defined(__CYGWIN__)
  2343. /*
  2344. * Create a URI structure
  2345. */
  2346. uri = xmlCreateURI();
  2347. if (uri == NULL) { /* Guard against 'out of memory' */
  2348. return(NULL);
  2349. }
  2350. len = xmlStrlen(path);
  2351. if ((len > 2) && IS_WINDOWS_PATH(path)) {
  2352. /* make the scheme 'file' */
  2353. uri->scheme = xmlStrdup(BAD_CAST "file");
  2354. /* allocate space for leading '/' + path + string terminator */
  2355. uri->path = xmlMallocAtomic(len + 2);
  2356. if (uri->path == NULL) {
  2357. xmlFreeURI(uri); /* Guard agains 'out of memory' */
  2358. return(NULL);
  2359. }
  2360. /* Put in leading '/' plus path */
  2361. uri->path[0] = '/';
  2362. p = uri->path + 1;
  2363. strncpy(p, path, len + 1);
  2364. } else {
  2365. uri->path = xmlStrdup(path);
  2366. if (uri->path == NULL) {
  2367. xmlFreeURI(uri);
  2368. return(NULL);
  2369. }
  2370. p = uri->path;
  2371. }
  2372. /* Now change all occurences of '\' to '/' */
  2373. while (*p != '\0') {
  2374. if (*p == '\\')
  2375. *p = '/';
  2376. p++;
  2377. }
  2378. if (uri->scheme == NULL) {
  2379. ret = xmlStrdup((const xmlChar *) uri->path);
  2380. } else {
  2381. ret = xmlSaveUri(uri);
  2382. }
  2383. xmlFreeURI(uri);
  2384. #else
  2385. ret = xmlStrdup((const xmlChar *) path);
  2386. #endif
  2387. return(ret);
  2388. }
  2389. /**
  2390. * xmlPathToURI:
  2391. * @path: the resource locator in a filesystem notation
  2392. *
  2393. * Constructs an URI expressing the existing path
  2394. *
  2395. * Returns a new URI, or a duplicate of the path parameter if the
  2396. * construction fails. The caller is responsible for freeing the memory
  2397. * occupied by the returned string. If there is insufficient memory available,
  2398. * or the argument is NULL, the function returns NULL.
  2399. */
  2400. xmlChar *
  2401. xmlPathToURI(const xmlChar *path)
  2402. {
  2403. xmlURIPtr uri;
  2404. xmlURI temp;
  2405. xmlChar *ret, *cal;
  2406. if (path == NULL)
  2407. return(NULL);
  2408. if ((uri = xmlParseURI((const char *) path)) != NULL) {
  2409. xmlFreeURI(uri);
  2410. return xmlStrdup(path);
  2411. }
  2412. cal = xmlCanonicPath(path);
  2413. if (cal == NULL)
  2414. return(NULL);
  2415. #if defined(_WIN32) && !defined(__CYGWIN__)
  2416. /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
  2417. If 'cal' is a valid URI allready then we are done here, as continuing would make
  2418. it invalid. */
  2419. if ((uri = xmlParseURI((const char *) cal)) != NULL) {
  2420. xmlFreeURI(uri);
  2421. return cal;
  2422. }
  2423. /* 'cal' can contain a relative path with backslashes. If that is processed
  2424. by xmlSaveURI, they will be escaped and the external entity loader machinery
  2425. will fail. So convert them to slashes. Misuse 'ret' for walking. */
  2426. ret = cal;
  2427. while (*ret != '\0') {
  2428. if (*ret == '\\')
  2429. *ret = '/';
  2430. ret++;
  2431. }
  2432. #endif
  2433. memset(&temp, 0, sizeof(temp));
  2434. temp.path = (char *) cal;
  2435. ret = xmlSaveUri(&temp);
  2436. xmlFree(cal);
  2437. return(ret);
  2438. }
  2439. #define bottom_uri
  2440. #include "elfgcchack.h"