Schema of the Generated Database


-- Details of interdependant identifiers appearing in the workspace
CREATE TABLE IDS(
  EID BIGINT PRIMARY KEY, -- Unique identifier key
  NAME CHARACTER VARYING, -- Identifier name
  READONLY BOOLEAN, -- True if it appears in at least one read-only file
  UNDEFMACRO BOOLEAN, -- True if it is apparantly an undefined macro
  MACRO BOOLEAN, -- True if it a preprocessor macro
  FUNMACRO BOOLEAN, -- True if it a preprocessor function-like macro
  MACROARG BOOLEAN, -- True if it a preprocessor macro argument
  CPPCONST BOOLEAN, -- True if used in a preprocessor constant
  CPPSTRVAL BOOLEAN, -- True if macro value is used as a preprocessor string operand
  DEFCCONSTVAL BOOLEAN, -- True if macro value defined as a C compile time constant
  NOTDEFCCONSTVAL BOOLEAN, -- True if macro value defined as not a C compile time constant
  EXPCCONSTVAL BOOLEAN, -- True if macro value expanded as a C compile time constant
  NOTEXPCCONSTVAL BOOLEAN, -- True if macro value expanded as not a C compile time constant
  ORDINARY BOOLEAN, -- True if it is an ordinary identifier (variable or function)
  SUETAG BOOLEAN, -- True if it is a structure, union, or enumeration tag
  SUMEMBER BOOLEAN, -- True if it is a structure or union member
  LABEL BOOLEAN, -- True if it is a label
  TYPEDEF BOOLEAN, -- True if it is a typedef
  ENUM BOOLEAN, -- True if it is an enumeration member
  YACC BOOLEAN, -- True if it is a yacc identifier
  FUN BOOLEAN, -- True if it is a function name
  CSCOPE BOOLEAN, -- True if its scope is a compilation unit
  LSCOPE BOOLEAN, -- True if it has linkage scope
  UNUSED BOOLEAN -- True if it is not used
);


-- File details
CREATE TABLE FILES(
  FID INTEGER PRIMARY KEY, -- Unique file key
  NAME CHARACTER VARYING, -- File name
  RO BOOLEAN -- True if the file is read-only
);


-- File metrics
CREATE TABLE FILEMETRICS(
  FID INTEGER, -- File key
  PRECPP BOOLEAN, -- True for values before the cpp false for values after it
  NCHAR INTEGER, -- Number of characters
  NCCOMMENT INTEGER, -- Number of comment characters
  NSPACE INTEGER, -- Number of space characters
  NLCOMMENT INTEGER, -- Number of line comments
  NBCOMMENT INTEGER, -- Number of block comments
  NLINE INTEGER, -- Number of lines
  MAXLINELEN INTEGER, -- Maximum number of characters in a line
  MAXSTMTLEN INTEGER, -- Maximum number of tokens in a statement
  MAXSTMTNEST INTEGER, -- Maximum level of statement nesting
  MAXBRACENEST INTEGER, -- Maximum level of brace nesting
  MAXBRACKNEST INTEGER, -- Maximum level of bracket nesting
  BRACENEST INTEGER, -- Dangling brace nesting
  BRACKNEST INTEGER, -- Dangling bracket nesting
  NULINE INTEGER, -- Number of unprocessed lines
  NPPDIRECTIVE INTEGER, -- Number of C preprocessor directives
  NPPCOND INTEGER, -- Number of processed C preprocessor conditionals (ifdef, if, elif)
  NPPFMACRO INTEGER, -- Number of defined C preprocessor function-like macros
  NPPOMACRO INTEGER, -- Number of defined C preprocessor object-like macros
  NTOKEN INTEGER, -- Number of tokens
  NSTMT INTEGER, -- Number of statements or declarations
  NOP INTEGER, -- Number of operators
  NUOP INTEGER, -- Number of unique operators
  NNCONST INTEGER, -- Number of numeric constants
  NCLIT INTEGER, -- Number of character literals
  NSTRING INTEGER, -- Number of character strings
  NPPCONCATOP INTEGER, -- Number of token concatenation operators (##)
  NPPSTRINGOP INTEGER, -- Number of token stringification operators (#)
  NIF INTEGER, -- Number of if statements
  NELSE INTEGER, -- Number of else clauses
  NSWITCH INTEGER, -- Number of switch statements
  NCASE INTEGER, -- Number of case labels
  NDEFAULT INTEGER, -- Number of default labels
  NBREAK INTEGER, -- Number of break statements
  NFOR INTEGER, -- Number of for statements
  NWHILE INTEGER, -- Number of while statements
  NDO INTEGER, -- Number of do statements
  NCONTINUE INTEGER, -- Number of continue statements
  NGOTO INTEGER, -- Number of goto statements
  NRETURN INTEGER, -- Number of return statements
  NASM INTEGER, -- Number of assembly statements
  NTYPEOF INTEGER, -- Number of typeof operators
  NPID INTEGER, -- Number of project-scope identifiers
  NFID INTEGER, -- Number of file-scope (static) identifiers
  NMID INTEGER, -- Number of macro identifiers
  NID INTEGER, -- Total number of object and object-like identifiers
  NUPID INTEGER, -- Number of unique project-scope identifiers
  NUFID INTEGER, -- Number of unique file-scope (static) identifiers
  NUMID INTEGER, -- Number of unique macro identifiers
  NUID INTEGER, -- Number of unique object and object-like identifiers
  NLABEL INTEGER, -- Number of goto labels
  NMACROEXPANDTOKEN INTEGER, -- Tokens added by macro expansion
  NCOPIES INTEGER, -- Number of copies of the file
  NINCFILE INTEGER, -- Number of directly included files
  NPFUNCTION INTEGER, -- Number of defined project-scope functions
  NFFUNCTION INTEGER, -- Number of defined file-scope (static) functions
  NPVAR INTEGER, -- Number of defined project-scope variables
  NFVAR INTEGER, -- Number of defined file-scope (static) variables
  NAGGREGATE INTEGER, -- Number of complete aggregate (struct/union) declarations
  NAMEMBER INTEGER, -- Number of declared aggregate (struct/union) members
  NENUM INTEGER, -- Number of complete enumeration declarations
  NEMEMBER INTEGER, -- Number of declared enumeration elements
  PRIMARY KEY(FID, PRECPP),
  FOREIGN KEY(FID) REFERENCES FILES(FID)
);


-- Instances of identifier tokens within the source code
CREATE TABLE TOKENS(
  FID INTEGER, -- File key
  FOFFSET INTEGER, -- Offset within the file
  EID BIGINT, -- Identifier key
  PRIMARY KEY(FID, FOFFSET),
  FOREIGN KEY(FID) REFERENCES FILES(FID),
  FOREIGN KEY(EID) REFERENCES IDS(EID)
);


-- Comments in the code
CREATE TABLE COMMENTS(
  FID INTEGER, -- File key
  FOFFSET INTEGER, -- Offset within the file
  COMMENT CHARACTER VARYING, -- The comment, including its delimiters
  PRIMARY KEY(FID, FOFFSET),
  FOREIGN KEY(FID) REFERENCES FILES(FID)
);


-- Strings in the code
CREATE TABLE STRINGS(
  FID INTEGER, -- File key
  FOFFSET INTEGER, -- Offset within the file
  STRING CHARACTER VARYING, -- The string, including its delimiters
  PRIMARY KEY(FID, FOFFSET),
  FOREIGN KEY(FID) REFERENCES FILES(FID)
);


-- Remaining, non-identifier source code
CREATE TABLE REST(
  FID INTEGER, -- File key
  FOFFSET INTEGER, -- Offset within the file
  CODE CHARACTER VARYING, -- The actual code
  PRIMARY KEY(FID, FOFFSET),
  FOREIGN KEY(FID) REFERENCES FILES(FID)
);


-- Line number offsets within each file
CREATE TABLE LINEPOS(
  FID INTEGER, -- File key
  FOFFSET INTEGER, -- Offset within the file
  LNUM INTEGER, -- Line number (starts at 1)
  PRIMARY KEY(FID, FOFFSET),
  FOREIGN KEY(FID) REFERENCES FILES(FID)
);


-- Project details
CREATE TABLE PROJECTS(
  PID INTEGER PRIMARY KEY, -- Unique project key
  NAME CHARACTER VARYING -- Project name
);


-- Identifiers appearing in projects
CREATE TABLE IDPROJ(
  EID BIGINT, -- Identifier key
  PID INTEGER, -- Project key
  FOREIGN KEY(EID) REFERENCES IDS(EID),
  FOREIGN KEY(PID) REFERENCES PROJECTS(PID)
);


-- Files used in projects
CREATE TABLE FILEPROJ(
  FID INTEGER, -- File key
  PID INTEGER, -- Project key
  FOREIGN KEY(FID) REFERENCES FILES(FID),
  FOREIGN KEY(PID) REFERENCES PROJECTS(PID)
);


-- Foreign keys for the following four tables are not specified, because it is
-- difficult to satisfy integrity constraints: files (esp. their metrics,
-- esp. ncopies) can't be written until the end of processing, while
-- to conserve space, these tables are written after each file is processed.
-- Alternatively, inserts to these tables could be wrapped into
-- SET REFERENTIAL_INTEGRITY { TRUE | FALSE } calls.


-- Included files defining required elements for a given compilation unit and project
CREATE TABLE DEFINERS(
  PID INTEGER, -- Project key
  CUID INTEGER, -- Compilation unit key
  BASEFILEID INTEGER, -- File (often .c) requiring (using) a definition
  DEFINERID INTEGER -- File (often .h) providing a definition
  -- FOREIGN KEY(PID) REFERENCES PROJECTS(PID),
  -- FOREIGN KEY(CUID) REFERENCES FILES(FID),
  -- FOREIGN KEY(BASEFILEID) REFERENCES FILES(FID),
  -- FOREIGN KEY(DEFINERID) REFERENCES FILES(FID)
);


-- Included files including files for a given compilation unit and project
CREATE TABLE INCLUDERS(
  PID INTEGER, -- Project key
  CUID INTEGER, -- Compilation unit key
  BASEFILEID INTEGER, -- File included in the compilation
  INCLUDERID INTEGER -- Files that include it
  -- FOREIGN KEY(PID) REFERENCES PROJECTS(PID),
  -- FOREIGN KEY(CUID) REFERENCES FILES(FID),
  -- FOREIGN KEY(BASEFILEID) REFERENCES FILES(FID),
  -- FOREIGN KEY(INCLUDERID) REFERENCES FILES(FID)
);


-- Included files providing code or data for a given compilation unit and project
CREATE TABLE PROVIDERS(
  PID INTEGER, -- Project key
  CUID INTEGER, -- Compilation unit key
  PROVIDERID INTEGER -- Included file
  -- FOREIGN KEY(PID) REFERENCES PROJECTS(PID),
  -- FOREIGN KEY(CUID) REFERENCES FILES(FID),
  -- FOREIGN KEY(PROVIDERID) REFERENCES FILES(FID)
);


-- Tokens requiring file inclusion for a given compilation unit and project
CREATE TABLE INCTRIGGERS(
  PID INTEGER, -- Project key
  CUID INTEGER, -- Compilation unit key
  BASEFILEID INTEGER, -- File requiring a definition
  DEFINERID INTEGER, -- File providing a definition
  FOFFSET INTEGER, -- Definition's offset within the providing file
  LEN INTEGER -- Token's length
  -- FOREIGN KEY(PID) REFERENCES PROJECTS(PID),
  -- FOREIGN KEY(CUID) REFERENCES FILES(FID),
  -- FOREIGN KEY(BASEFILEID) REFERENCES FILES(FID),
  -- FOREIGN KEY(DEFINERID) REFERENCES FILES(FID)
);


-- C functions and function-like macros
CREATE TABLE FUNCTIONS(
  ID BIGINT PRIMARY KEY, -- Unique function identifier
  NAME CHARACTER VARYING, -- Function name (redundant; see FUNCTIONID)
  ISMACRO BOOLEAN, -- True if a function-like macro (otherwise a C function)
  DEFINED BOOLEAN, -- True if the function is defined within the workspace
  DECLARED BOOLEAN, -- True if the function is declared within the workspace
  FILESCOPED BOOLEAN, -- True if the function's scope is a single compilation unit (static or macro)
  FID INTEGER, -- File key of the function's definition, declaration, or use
  FOFFSET INTEGER, -- Offset of definition, declaration, or use within the file
  FANIN INTEGER, -- Fan-in (number of callers)
  FOREIGN KEY(FID) REFERENCES FILES(FID)
);


-- Details of defined functions and macros
CREATE TABLE FUNCTIONDEFS(
  FUNCTIONID BIGINT PRIMARY KEY, -- Function identifier key
  FIDBEGIN INTEGER, -- File key of the function's definition begin
  FOFFSETBEGIN INTEGER, -- Offset of definition begin within the file
  FIDEND INTEGER, -- File key of the function's definition end
  FOFFSETEND INTEGER, -- Offset of definition end within the file
  FOREIGN KEY(FUNCTIONID) REFERENCES FUNCTIONS(ID)
);


-- Metrics of defined functions and macros
CREATE TABLE FUNCTIONMETRICS(
  FUNCTIONID BIGINT, -- Function identifier key
  PRECPP BOOLEAN, -- True for values before the cpp false for values after it
  NCHAR INTEGER, -- Number of characters
  NCCOMMENT INTEGER, -- Number of comment characters
  NSPACE INTEGER, -- Number of space characters
  NLCOMMENT INTEGER, -- Number of line comments
  NBCOMMENT INTEGER, -- Number of block comments
  NLINE INTEGER, -- Number of lines
  MAXLINELEN INTEGER, -- Maximum number of characters in a line
  MAXSTMTLEN INTEGER, -- Maximum number of tokens in a statement
  MAXSTMTNEST INTEGER, -- Maximum level of statement nesting
  MAXBRACENEST INTEGER, -- Maximum level of brace nesting
  MAXBRACKNEST INTEGER, -- Maximum level of bracket nesting
  BRACENEST INTEGER, -- Dangling brace nesting
  BRACKNEST INTEGER, -- Dangling bracket nesting
  NULINE INTEGER, -- Number of unprocessed lines
  NPPDIRECTIVE INTEGER, -- Number of C preprocessor directives
  NPPCOND INTEGER, -- Number of processed C preprocessor conditionals (ifdef, if, elif)
  NPPFMACRO INTEGER, -- Number of defined C preprocessor function-like macros
  NPPOMACRO INTEGER, -- Number of defined C preprocessor object-like macros
  NTOKEN INTEGER, -- Number of tokens
  NSTMT INTEGER, -- Number of statements or declarations
  NOP INTEGER, -- Number of operators
  NUOP INTEGER, -- Number of unique operators
  NNCONST INTEGER, -- Number of numeric constants
  NCLIT INTEGER, -- Number of character literals
  NSTRING INTEGER, -- Number of character strings
  NPPCONCATOP INTEGER, -- Number of token concatenation operators (##)
  NPPSTRINGOP INTEGER, -- Number of token stringification operators (#)
  NIF INTEGER, -- Number of if statements
  NELSE INTEGER, -- Number of else clauses
  NSWITCH INTEGER, -- Number of switch statements
  NCASE INTEGER, -- Number of case labels
  NDEFAULT INTEGER, -- Number of default labels
  NBREAK INTEGER, -- Number of break statements
  NFOR INTEGER, -- Number of for statements
  NWHILE INTEGER, -- Number of while statements
  NDO INTEGER, -- Number of do statements
  NCONTINUE INTEGER, -- Number of continue statements
  NGOTO INTEGER, -- Number of goto statements
  NRETURN INTEGER, -- Number of return statements
  NASM INTEGER, -- Number of assembly statements
  NTYPEOF INTEGER, -- Number of typeof operators
  NPID INTEGER, -- Number of project-scope identifiers
  NFID INTEGER, -- Number of file-scope (static) identifiers
  NMID INTEGER, -- Number of macro identifiers
  NID INTEGER, -- Total number of object and object-like identifiers
  NUPID INTEGER, -- Number of unique project-scope identifiers
  NUFID INTEGER, -- Number of unique file-scope (static) identifiers
  NUMID INTEGER, -- Number of unique macro identifiers
  NUID INTEGER, -- Number of unique object and object-like identifiers
  NLABEL INTEGER, -- Number of goto labels
  NMACROEXPANDTOKEN INTEGER, -- Tokens added by macro expansion
  NGNSOC INTEGER, -- Number of global namespace occupants at function's top
  NMPARAM INTEGER, -- Number of parameters (for macros)
  NFPARAM INTEGER, -- Number of parameters (for functions)
  NEPARAM INTEGER, -- Number of passed non-expression macro parameters
  FANIN INTEGER, -- Fan-in (number of calling functions)
  FANOUT INTEGER, -- Fan-out (number of called functions)
  CCYCL1 INTEGER, -- Cyclomatic complexity (control statements)
  CCYCL2 INTEGER, -- Extended cyclomatic complexity (includes branching operators)
  CCYCL3 INTEGER, -- Maximum cyclomatic complexity (includes branching operators and all switch branches)
  CSTRUC REAL, -- Structure complexity (Henry and Kafura)
  CHAL REAL, -- Halstead volume
  IFLOW REAL, -- Information flow metric (Henry and Selig)
  PRIMARY KEY(FUNCTIONID, PRECPP),
  FOREIGN KEY(FUNCTIONID) REFERENCES FUNCTIONS(ID)
);


-- Identifiers comprising a function's name
CREATE TABLE FUNCTIONID(
  FUNCTIONID BIGINT, -- Function identifier key
  ORDINAL INTEGER, -- Position of the identifier within the function name (0-based)
  EID BIGINT, -- Identifier key
  PRIMARY KEY(FUNCTIONID, ORDINAL),
  FOREIGN KEY(FUNCTIONID) REFERENCES FUNCTIONS(ID),
  FOREIGN KEY(EID) REFERENCES IDS(EID)
);


-- Function calls
CREATE TABLE FCALLS(
  SOURCEID BIGINT, -- Calling function identifier key
  DESTID BIGINT, -- Called function identifier key
  FOREIGN KEY(SOURCEID) REFERENCES FUNCTIONS(ID),
  FOREIGN KEY(DESTID) REFERENCES FUNCTIONS(ID)
);


-- Files occuring in more than one copy
CREATE TABLE FILECOPIES(
  GROUPID INTEGER, -- File group identifier
  FID INTEGER, -- Key of file belonging to a group of identical files
  PRIMARY KEY(GROUPID, FID),
  FOREIGN KEY(FID) REFERENCES FILES(FID)
);