124 lines
3.5 KiB
Diff
124 lines
3.5 KiB
Diff
|
Description: Improve the XLS parsing:
|
||
|
* Don't stop processing after an EOF which is not followed by a BOF, as there
|
||
|
are many records that can appear after it (like a graph).
|
||
|
* On unexpected BOF record, dump already extracted data before complaining and
|
||
|
freeing memory.
|
||
|
* Accept different versions of BOF and XF records.
|
||
|
* Add more #defines for record types.
|
||
|
|
||
|
--- src/xlsparse.c
|
||
|
+++ src/xlsparse.c
|
||
|
@@ -107,12 +107,13 @@
|
||
|
itemsread = catdoc_read(rec, 1, reclen, input);
|
||
|
rec[reclen] = '\0';
|
||
|
}
|
||
|
+ /*
|
||
|
+ fprintf(stderr,"Rectype 0x%04X reclen=%d\n",rectype, reclen);
|
||
|
if(eof_flag) {
|
||
|
- if (rectype != BOF) {
|
||
|
+ if (rectype != BOF8) {
|
||
|
break;
|
||
|
}
|
||
|
- }
|
||
|
-/* fprintf(stderr,"Rectype 0x%04X reclen=%d\n",rectype, reclen); */
|
||
|
+ }*/
|
||
|
process_item(rectype,reclen,rec);
|
||
|
if (rectype == MSEOF) {
|
||
|
eof_flag=1;
|
||
|
@@ -150,7 +151,7 @@
|
||
|
case WRITEPROT:
|
||
|
/* File is write protected, but we only read it */
|
||
|
break;
|
||
|
- case 0x42: {
|
||
|
+ case CODEPAGE: {
|
||
|
if (source_charset) break;
|
||
|
codepage=getshort(rec,0);
|
||
|
/*fprintf(stderr,"CODEPAGE %d\n",codepage); */
|
||
|
@@ -274,9 +275,10 @@
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
- case 0x03:
|
||
|
- case 0x103:
|
||
|
- case 0x303:
|
||
|
+ /* These 3 don't seem to make any sense. */
|
||
|
+ case INVALID_03:
|
||
|
+ case SXFORMULA:
|
||
|
+ case INVALID_303:
|
||
|
case NUMBER: {
|
||
|
int row,col;
|
||
|
unsigned char **pcell;
|
||
|
@@ -363,22 +365,31 @@
|
||
|
*saved_reference=copy_unicode_string(&src);
|
||
|
break;
|
||
|
}
|
||
|
- case BOF: {
|
||
|
+ case BOF2:
|
||
|
+ case BOF3:
|
||
|
+ case BOF4:
|
||
|
+ case BOF8: {
|
||
|
if (rowptr) {
|
||
|
fprintf(stderr,"BOF when current sheet is not flushed\n");
|
||
|
+ print_sheet();
|
||
|
free_sheet();
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
- case XF:
|
||
|
- case 0x43: /*from perl module Spreadsheet::ParseExecel */
|
||
|
+ case XF_4P:
|
||
|
+ case XF_4:
|
||
|
+ case XF: /*from perl module Spreadsheet::ParseExecel */
|
||
|
{
|
||
|
- short int formatIndex = getshort(rec,2);
|
||
|
+ short int formatIndex;
|
||
|
+ if (biff_version == 4)
|
||
|
+ formatIndex = (short int)rec[1];
|
||
|
+ else
|
||
|
+ formatIndex = getshort(rec, 2);
|
||
|
/* we are interested only in format index here */
|
||
|
if (formatTableIndex >= formatTableSize) {
|
||
|
formatTable=realloc(formatTable,
|
||
|
- (formatTableSize+=16)*sizeof(short int));
|
||
|
-
|
||
|
+ (formatTableSize+=16)*sizeof(short int));
|
||
|
+
|
||
|
if (!formatTable) {
|
||
|
fprintf(stderr,"Out of memory for format table");
|
||
|
exit (1);
|
||
|
--- src/xltypes.h
|
||
|
+++ src/xltypes.h
|
||
|
@@ -20,7 +20,7 @@
|
||
|
#define AUTOFILTERINFO 0x9D
|
||
|
#define BACKUP 0x40
|
||
|
#define BLANK 0x201
|
||
|
-#define BOF 0x809
|
||
|
+#define BOF8 0x809
|
||
|
#define BOOKBOOL 0xDA
|
||
|
#define BOOLERR 0x205
|
||
|
#define BOTTOMMARGIN 0x29
|
||
|
@@ -149,11 +149,21 @@
|
||
|
#define WRITEPROT 0x86
|
||
|
#define WSBOOL 0x81
|
||
|
#define XCT 0x59
|
||
|
-#define XF 0xE0
|
||
|
+#define XF_4P 0xE0
|
||
|
#define SST 0xFC
|
||
|
#define CONSTANT_STRING 0xFD
|
||
|
#define REFRESHALL 0x1B7
|
||
|
#define USESELFS 0x160
|
||
|
#define EXTSST 0xFF
|
||
|
/* Vitus additions */
|
||
|
-#define INTEGER_CELL 0x202
|
||
|
+#define INTEGER_CELL 0x202
|
||
|
+/* Tincho addtions */
|
||
|
+#define BOF2 0x09
|
||
|
+#define BOF3 0x209
|
||
|
+#define BOF4 0x409
|
||
|
+#define INVALID_03 0x03
|
||
|
+#define INVALID_303 0x303
|
||
|
+#define MSODRAWING 0xEC
|
||
|
+#define SXFORMULA 0x103
|
||
|
+#define XF 0x43
|
||
|
+#define XF_4 0x443
|