org_lexer.l 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516
  1. /* Org Mode file lexer -*-c-*- */
  2. %{
  3. /*
  4. * This program is free software: you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License as
  6. * published by the Free Software Foundation, either vers* ion 3 of
  7. * the License, or (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful, but
  10. * WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. * General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program. If not, see
  16. * <http://www.gnu.org/licenses/>.
  17. */
  18. /* Org Mode lexer */
  19. #include <stdio.h>
  20. #include <stdlib.h>
  21. #include <assert.h>
  22. #include "doc_elt_util.h"
  23. #include "org_parser.h"
  24. #include "doc_elt.h"
  25. #include "org_heading.h"
  26. #include "org_text.h"
  27. #include "org_property.h"
  28. int yywrap (yyscan_t scanner);
  29. void yyerror (char const *);
  30. void headline_aoeaoe (org_heading *heading, char *string, size_t len, yyscan_t scanner);
  31. %}
  32. /* Flex Options */
  33. /* Generate a reentrant parser */
  34. %option reentrant
  35. %option header-file="org_lexer.h"
  36. %option extra-type="struct extra *"
  37. /* Rule Start Conditions */
  38. %x heading text property property_skipwhitespace property_value property_finish
  39. %s property_drawer
  40. %% /* Start of rules */
  41. /*
  42. * Heading
  43. * "** Heading blah blah "
  44. */
  45. /* start of a heading */
  46. ^[*]+" ".*[\n]? {
  47. debug_msg (LEXER, 4, "heading start\n");
  48. /* Create a temporary data store */
  49. TOKEN temp = yyextra->curr_type;
  50. yyextra->elt = yyextra->curr_elt;
  51. /* create the new element */
  52. yyextra->curr_elt = (doc_elt *) org_heading_create_empty (&org_heading_ops);
  53. yyextra->curr_type = T_ORG_HEADING;
  54. org_heading_initversion ((org_heading *)yyextra->curr_elt, yyextra->src);
  55. /* copy in the data in */
  56. char *c = malloc (sizeof (char)*(yyleng));
  57. strncpy (c, yytext, yyleng);
  58. org_heading_set_entire_text ((org_heading *)yyextra->curr_elt, c, yyleng, yyextra->src, yyextra->ctxt);
  59. /* scan the line for the internal values */
  60. BEGIN (INITIAL);
  61. /* return the previous element, if there was one */
  62. if (yyextra->elt != NULL && temp != T_NOTHING)
  63. {
  64. debug_msg (LEXER, 3, "element return\n");
  65. return temp;
  66. }
  67. }
  68. /*
  69. * Text
  70. *
  71. * Any paragraph of text, also a catch all for anything without
  72. * specific rules
  73. */
  74. /* start of a plain text segment */
  75. ^. {
  76. BEGIN(text);
  77. yymore ();
  78. debug_msg (LEXER, 5, "text start line\n");
  79. }
  80. /* finish a line */
  81. <text>.*"\n"? {
  82. debug_msg (LEXER, 5, "text finish line\n");
  83. TOKEN temp = T_NOTHING;
  84. /* append this line of text */
  85. if (yyextra->curr_type != T_ORG_TEXT)
  86. {
  87. /* Create a temporary data store */
  88. temp = yyextra->curr_type;
  89. yyextra->elt = yyextra->curr_elt;
  90. debug_msg (LEXER, 5, "text create new\n");
  91. /* create a new element */
  92. yyextra->curr_type = T_ORG_TEXT;
  93. yyextra->curr_elt = (doc_elt *) org_text_create_empty (&org_text_ops);
  94. org_text_initversion ( (org_text *) yyextra->curr_elt, yyextra->src);
  95. }
  96. /* add data to the element */
  97. size_t size = org_text_get_length ((org_text *)yyextra->curr_elt, yyextra->src);
  98. char * string = org_text_get_text ((org_text *) yyextra->curr_elt, yyextra->src);
  99. char * new_string = malloc (sizeof (char) * (size + yyleng));
  100. strncpy (new_string, string, size);
  101. strncpy ((new_string + size), yytext, yyleng);
  102. org_text_set_text((org_text *) yyextra->curr_elt, new_string, size + yyleng, yyextra->src);
  103. BEGIN(INITIAL);
  104. /* return the previous element, if there was one */
  105. if (temp != T_NOTHING && yyextra->elt != NULL)
  106. {
  107. debug_msg (LEXER, 3, "element return\n");
  108. return temp;
  109. }
  110. /* Can still match more text for this element */
  111. }
  112. /* a newline apart of the paragraph */
  113. ^"\n" {
  114. debug_msg (LEXER, 5, "text add newline\n");
  115. TOKEN temp = T_NOTHING;
  116. /* append this line of text */
  117. if (yyextra->curr_type != T_ORG_TEXT)
  118. {
  119. /* Create a temporary data store */
  120. temp = yyextra->curr_type;
  121. yyextra->elt = yyextra->curr_elt;
  122. debug_msg (LEXER, 5, "text create new\n");
  123. /* create a new element */
  124. yyextra->curr_type = T_ORG_TEXT;
  125. yyextra->curr_elt = (doc_elt *) org_text_create_empty (&org_text_ops);
  126. org_text_initversion ( (org_text *) yyextra->curr_elt, yyextra->src);
  127. }
  128. /* add data to the element */
  129. size_t size = org_text_get_length ((org_text *)yyextra->curr_elt, yyextra->src);
  130. char * string = org_text_get_text ((org_text *) yyextra->curr_elt, yyextra->src);
  131. char * new_string = malloc (sizeof (char) * (size + yyleng));
  132. strncpy (new_string, string, size);
  133. strncpy ((new_string + size), yytext, yyleng);
  134. org_text_set_text((org_text *) yyextra->curr_elt, new_string, size + yyleng, yyextra->src);
  135. BEGIN(INITIAL);
  136. /* return the previous element, if there was one */
  137. if (temp != T_NOTHING && yyextra->elt != NULL)
  138. {
  139. debug_msg (LEXER, 3, "element return\n");
  140. return temp;
  141. }
  142. /* Can still match more text for this element */
  143. }
  144. /* Properties
  145. * " :ID: 1201324054621536421035 "
  146. *
  147. * Org mode properties are (key, value) pairs that can apear
  148. * anywhere. They will split elements that are currently being
  149. * defined. They are recognized as any element that has only
  150. * whitespace between it and a semicolon surounded key. Spaces are
  151. * allewed as apart of the key.
  152. */
  153. /*start of a property line, with the tag */
  154. <property_drawer>^" "*":"[^:]*":" {
  155. debug_msg (LEXER, 3, "property start\n");
  156. /* Create a temporary data store */
  157. TOKEN temp = yyextra->curr_type;
  158. yyextra->elt = yyextra->curr_elt;
  159. /* create the new element */
  160. yyextra->curr_elt = (doc_elt *) org_property_create_empty (&org_property_ops);
  161. yyextra->curr_type = T_ORG_PROPERTY;
  162. org_property_initversion ((org_property *)yyextra->curr_elt, yyextra->src);
  163. org_property_set_key_length ((org_property *)yyextra->curr_elt, yyextra->src, yyleng - 1);
  164. BEGIN(property_skipwhitespace);
  165. yymore();
  166. /* return the previous element, if there was one */
  167. if (yyextra->elt != NULL && temp != T_NOTHING)
  168. {
  169. debug_msg (LEXER, 3, "property element return\n");
  170. return temp;
  171. }
  172. }
  173. /* skip white space */
  174. <property_skipwhitespace>[ \t]* {
  175. /* store the offset to the start of the value */
  176. org_property_set_value_length ((org_property *)yyextra->curr_elt, yyextra->src,
  177. yyleng);
  178. yymore();
  179. BEGIN(property_finish);
  180. }
  181. /* Finish the property line */
  182. <property_finish>.*[\n]? {
  183. /* Set the property text.
  184. * At this point, the values of the property look like this:
  185. * " :key: value text "
  186. * ^ ^ ^
  187. * \------|------------|-> key length
  188. * \------------|-> value length
  189. * \-> yylen
  190. */
  191. //debug_msg ("Properties");
  192. /* create the new string */
  193. char * new_text = malloc (sizeof (char) * (yyleng));
  194. strncpy (new_text, yytext, yyleng);
  195. size_t key_length = org_property_get_key_length ((org_property *)yyextra->curr_elt,
  196. yyextra->src);
  197. size_t value_length = org_property_get_value_length ((org_property *)yyextra->curr_elt,
  198. yyextra->src);
  199. org_property_set_text((org_property *) yyextra->curr_elt, yyextra->src,
  200. new_text, yyleng);
  201. org_property_set_value_string ((org_property *)yyextra->curr_elt, yyextra->src,
  202. new_text + value_length);
  203. /* do a backward seach to find the end of the string */
  204. size_t pos = yyleng-1;
  205. while (pos > 0)
  206. {
  207. if (new_text[pos] != ' ' && new_text[pos] != '\t')
  208. break;
  209. pos--;
  210. }
  211. value_length = pos - value_length;
  212. assert (value_length >= 0);
  213. org_property_set_value_length ((org_property *)yyextra->curr_elt,
  214. yyextra->src, value_length);
  215. /* do a backword search to find the start of the string.
  216. * set the key substring, do not include the markup ':'
  217. */
  218. pos = key_length;
  219. while (pos > 0)
  220. {
  221. if (new_text[pos - 1] == ':')
  222. break;
  223. pos--;
  224. }
  225. org_property_set_key_string ((org_property *)yyextra->curr_elt, yyextra->src,
  226. new_text + pos);
  227. org_property_set_key_length ((org_property *)yyextra->curr_elt, yyextra->src, key_length - pos);
  228. if (LEXER_PRINTLEVEL == 5)
  229. {
  230. fwrite ( org_property_get_key_string ((org_property *)yyextra->curr_elt, yyextra->src),
  231. sizeof (char) , org_property_get_key_length ((org_property *)yyextra->curr_elt,
  232. yyextra->src),
  233. stderr);
  234. fwrite ( org_property_get_value_string ((org_property *)yyextra->curr_elt, yyextra->src),
  235. sizeof (char) , org_property_get_value_length ((org_property *)yyextra->curr_elt,
  236. yyextra->src),
  237. stderr);
  238. }
  239. /* return the property */
  240. debug_msg (LEXER, 3, "Property Return\n");
  241. BEGIN(property_drawer);
  242. /* set the last element to nothing */
  243. yyextra->curr_type = T_NOTHING;
  244. yyextra->elt = yyextra->curr_elt;
  245. yyextra->curr_elt = NULL;
  246. return T_ORG_PROPERTY;
  247. }
  248. /* Drawer Parser
  249. * ":BEGIN:"
  250. * ":END:"
  251. *
  252. * A fully recursive element. They can start on any line. Since the
  253. * syntax is recursive, seperate tokens are needed for both the start
  254. * and end of a drawer.
  255. *
  256. * Current drawer support is just to create a property with no key.
  257. */
  258. /* Begin a drawer */
  259. ^[ ]*":PROPERTIES:"[ ]*[\n]? {
  260. debug_msg (LEXER, 3, "PROPERTIES drawer start\n");
  261. /* Create a temporary data store */
  262. TOKEN temp = yyextra->curr_type;
  263. yyextra->elt = yyextra->curr_elt;
  264. /* create the new element */
  265. yyextra->curr_elt = (doc_elt *) org_property_create_empty (&org_property_ops);
  266. yyextra->curr_type = T_ORG_PROPERTY;
  267. org_property_initversion ((org_property *)yyextra->curr_elt, yyextra->src);
  268. /* set the line text */
  269. char * new_text = malloc (sizeof (char) * (yyleng));
  270. strncpy (new_text, yytext, yyleng);
  271. org_property_set_text((org_property *) yyextra->curr_elt, yyextra->src,
  272. new_text, yyleng);
  273. /* do a backword search to find the start of the string.
  274. * set the key substring, do not include the markup ':'
  275. */
  276. int pos = yyleng - 1;
  277. while (pos >= 0)
  278. {
  279. if (new_text[pos] == ':')
  280. break;
  281. pos--;
  282. }
  283. /* set the key to the value of the line */
  284. org_property_set_key ((org_property *)yyextra->curr_elt, yyextra->src, new_text+pos-10, 10);
  285. /* set the value to 0 */
  286. org_property_set_value ((org_property *)yyextra->curr_elt, yyextra->src, NULL, 0);
  287. /* mark that we are currently in a property drawer */
  288. BEGIN(property_drawer);
  289. /* return the previous element, if there was one */
  290. if (yyextra->elt != NULL && temp != T_NOTHING)
  291. {
  292. debug_msg (LEXER, 3, "drawer element return\n");
  293. return temp;
  294. }
  295. }
  296. /* Finish A drawer. */
  297. <property_drawer>^[ ]*":END:"[ ]*"\n"? {
  298. debug_msg (LEXER, 3, "END Drawer\n");
  299. /* Create a temporary data store */
  300. TOKEN temp = yyextra->curr_type;
  301. yyextra->elt = yyextra->curr_elt;
  302. /* create the new element */
  303. yyextra->curr_elt = (doc_elt *) org_property_create_empty (&org_property_ops);
  304. yyextra->curr_type = T_ORG_PROPERTY;
  305. org_property_initversion ((org_property *)yyextra->curr_elt, yyextra->src);
  306. /* set the line text */
  307. char * new_text = malloc (sizeof (char) * (yyleng));
  308. strncpy (new_text, yytext, yyleng);
  309. org_property_set_text((org_property *) yyextra->curr_elt, yyextra->src,
  310. new_text, yyleng);
  311. /* do a backword search to find the start of the string.
  312. * set the key substring, do not include the markup ':'
  313. */
  314. int pos = yyleng - 1;
  315. while (pos >= 0)
  316. {
  317. if (new_text[pos] == ':')
  318. break;
  319. pos--;
  320. }
  321. /* set the key to the value of the line */
  322. org_property_set_key ((org_property *)yyextra->curr_elt, yyextra->src, new_text+pos-3, 3);
  323. /* set the value to 0 */
  324. org_property_set_value ((org_property *)yyextra->curr_elt, yyextra->src, NULL, 0);
  325. /* mark that we are currently in a property drawer */
  326. BEGIN(INITIAL);
  327. /* return the previous element, if there was one */
  328. if (yyextra->elt != NULL && temp != T_NOTHING)
  329. {
  330. debug_msg (LEXER, 3, "drawer element return\n");
  331. return temp;
  332. }
  333. }
  334. /*
  335. * End Of File Wrap up
  336. *
  337. * Close the Lexer and wrap up the last element
  338. */
  339. <<EOF>> {
  340. debug_msg (LEXER, 5, "EOF\n");
  341. if (yyextra->curr_elt != NULL)
  342. {
  343. /* If there is an element beintg defined, return it */
  344. yyextra->elt = yyextra->curr_elt;
  345. yyextra->curr_elt = NULL;
  346. debug_msg (LEXER, 5, "EOF return element\n");
  347. return yyextra->curr_type;
  348. }
  349. else
  350. {
  351. debug_msg (LEXER, 4, "EOF terminate lexer\n");
  352. yyterminate();
  353. }
  354. }
  355. %%
  356. int
  357. yywrap (yyscan_t scanner)
  358. {
  359. /* Tell lex to stop processing at the end of a file */
  360. return 1;
  361. }
  362. bool
  363. is_todo_state (substr *s)
  364. {
  365. return false;
  366. }
  367. /**
  368. * @brief Parse a titile line, setting all the propper substrings
  369. */
  370. static void
  371. parse_title_line (org_heading * h, char * line, size_t len)
  372. {
  373. int i = 0;
  374. size_t last_len = 0;
  375. substr next_substr;
  376. /* set all the substrings to len 0 */
  377. /* skip all starting blank space*/
  378. while ( i < len)
  379. {
  380. if (line[i] != ' ')
  381. break;
  382. }
  383. /* get the next word */
  384. next_substr.string = (line + i);
  385. last_len = i;
  386. while ( i < len)
  387. {
  388. if (line[i] == ' ')
  389. break;
  390. }
  391. next_substr.length = i - last_len;
  392. /* check what the last word was */
  393. if (is_todo_state (&next_substr))
  394. {
  395. // implement this
  396. //h->todo = s;
  397. /* get the next word */
  398. next_substr.string = (line + i);
  399. last_len = i;
  400. while ( i < len)
  401. {
  402. if (line[i] == ' ')
  403. break;
  404. }
  405. next_substr.length = i - last_len;
  406. }
  407. /* assume that the next substr is either text or :tags: */
  408. bool exit = false;
  409. while ( i < len)
  410. {
  411. if (next_substr.string[0] == ':')
  412. {
  413. /* grab all characters untill there is no tag */
  414. }
  415. else
  416. {
  417. /* grab all the characters, assuming its heading text */
  418. }
  419. /* get the next word */
  420. next_substr.string = (line + len);
  421. last_len = len;
  422. while ( i < len)
  423. {
  424. if (line[i] != ' ')
  425. break;
  426. }
  427. next_substr.length = len - last_len;
  428. }
  429. }