Postgres95 1.01 Distribution - Virgin Sources

author Marc G. Fournier <scrappy@hub.org>

Tue, 9 Jul 1996 06:22:35 +0000 (06:22 +0000)

committer Marc G. Fournier <scrappy@hub.org>

Tue, 9 Jul 1996 06:22:35 +0000 (06:22 +0000)
author Marc G. Fournier <scrappy@hub.org>
Tue, 9 Jul 1996 06:22:35 +0000 (06:22 +0000)
committer Marc G. Fournier <scrappy@hub.org>
Tue, 9 Jul 1996 06:22:35 +0000 (06:22 +0000)
diff --git a/src/Makefile b/src/Makefile

new file mode 100644 (file)

index 0000000..be536c7
--- /dev/null
+++ b/src/Makefile
@@ -0,0 +1,48 @@
+#-------------------------------------------------------------------------
+#
+# Makefile.inc--
+#    Build and install postgres.
+#
+# Copyright (c) 1994, Regents of the University of California
+#
+#
+# IDENTIFICATION
+#    $Header$
+#
+# NOTES
+#      objdir  - location of the objects and generated files (eg. obj)
+#
+#-------------------------------------------------------------------------
+
+SUBDIR= backend libpq bin
+
+FIND = find
+# assuming gnu tar and split here
+TAR  = tar
+SPLIT = split
+
+ETAGS = etags
+XARGS = xargs
+
+ifeq ($(USE_TCL), true)
+SUBDIR += libpgtcl
+endif
+
+include mk/postgres.subdir.mk
+
+TAGS:
+       rm -f TAGS; \
+       for i in backend libpq bin; do \
+         $(FIND) $$i -name '*.[chyl]' -print | $(XARGS) $(ETAGS) -a ; \
+       done
+
+# target to generate a backup tar file and split files that can be 
+# saved to 1.44M floppy
+BACKUP:
+       rm -f BACKUP.filelist BACKUP.tgz; \
+       $(FIND) . -not -path '*obj/*' -not -path '*data/*' -type f -print > BACKUP.filelist; \
+       $(TAR) --files-from BACKUP.filelist -c -z -v -f BACKUP.tgz
+       $(SPLIT) --bytes=1400k BACKUP.tgz pgBACKUP.     
+
+.PHONY: TAGS
+.PHONY: BACKUP
diff --git a/src/Makefile.global b/src/Makefile.global

new file mode 100644 (file)

index 0000000..2dd3ab5
--- /dev/null
+++ b/src/Makefile.global
@@ -0,0 +1,306 @@
+#-------------------------------------------------------------------------
+#
+# Makefile.global--
+#    global configuration for the Makefiles
+#
+# Copyright (c) 1994, Regents of the University of California
+#
+#
+# IDENTIFICATION
+#    $Header$
+#
+# NOTES
+#    This is seen by any Makefiles that include mk/postgres.mk. To
+#    override the default setting, create a Makefile.custom in this
+#    directory and put your defines there. (Makefile.custom is included
+#    at the end of this file.)
+#
+#    If you change any of these defines you probably have to 
+#      gmake clean; gmake
+#    since no dependecies are created for these. (of course you can 
+#    be crafty and check what files really depend on them and just remake
+#    those).
+#
+#-------------------------------------------------------------------------
+
+
+##############################################################################
+#
+# CONFIGURATION SECTION
+#
+# Following are settings pertaining to the postgres build and 
+# installation.  The most important one is obviously the name 
+# of the port.
+
+#  The name of the port.  Valid choices are:
+#      alpha           -       DEC Alpha AXP on OSF/1 2.0
+#      hpux            -       HP PA-RISC on HP-UX 9.0
+#      sparc_solaris   -       SUN SPARC on Solaris 2.4
+#      sparc           -       SUN SPARC on SunOS 4.1.3
+#      ultrix4         -       DEC MIPS on Ultrix 4.4
+#      linux           -       Intel x86 on Linux 1.2 and Linux ELF
+#                              (For non-ELF Linux, you need to comment out 
+#                              "LINUX_ELF=1" in src/mk/port/postgres.mk.linux)
+#      BSD44_derived   -       OSs derived from 4.4-lite BSD (NetBSD, FreeBSD)
+#       bsdi            -       BSD/OS 2.0 and 2.01
+#      aix             -       IBM on AIX 3.2.5
+#      irix5           -       SGI MIPS on IRIX 5.3
+#  Some hooks are provided for
+#      svr4            -       Intel x86 on Intel SVR4
+#      next            -       Motorola MC68K or Intel x86 on NeXTSTEP 3.2
+#  but these are guaranteed not to work as of yet.
+#
+#  XXX Note that you MUST set PORTNAME here (or on the command line) so 
+#      that port-dependent variables are correctly set within this file.
+#      Makefile.custom does not take effect (for ifeq purposes) 
+#      until after this file is processed!
+#  make sure that you have no whitespaces after the PORTNAME setting
+#  or the makefiles can get confused
+PORTNAME=      alpha
+
+# POSTGRESLOGIN is the login name of the user who gets special
+# privileges within the database.  By default it is "postgres", but
+# you can change it to any existing login name (such as your own 
+# login if you are compiling a private version or don't have root
+# access).
+POSTGRESLOGIN= postgres
+
+# For convenience, POSTGRESDIR is where DATADIR, BINDIR, and LIBDIR 
+# and other target destinations are rooted.  Of course, each of these is 
+# changable separately.
+POSTGRESDIR=   /private/postgres95
+
+# SRCDIR specifies where the source files are.
+SRCDIR=                $(POSTGRESDIR)/src
+
+# DATADIR specifies where the postmaster expects to find its database.
+# This may be overridden by command line options or the PGDATA environment
+# variable.
+DATADIR=       $(POSTGRESDIR)/data
+
+# Where the postgres executables live (changeable by just putting them
+# somewhere else and putting that directory in your shell PATH)
+BINDIR=                $(POSTGRESDIR)/bin
+
+# Where libpq.a gets installed.  You must put it where your loader will
+# look for it if you wish to use the -lpq convention.  Otherwise you
+# can just put the absolute pathname to the library at the end of your
+# command line.
+LIBDIR=                $(POSTGRESDIR)/lib
+
+# This is the directory where IPC utilities ipcs and ipcrm are located
+#
+IPCSDIR=       /usr/bin
+
+# Where the man pages (suitable for use with "man") get installed.
+POSTMANDIR=    $(POSTGRESDIR)/man
+
+# Where the formatted documents (e.g., the reference manual) get installed.
+POSTDOCDIR=    $(POSTGRESDIR)/doc
+
+# Where the header files necessary to build frontend programs get installed.
+HEADERDIR=     $(POSTGRESDIR)/include
+
+# NAMEDATALEN is the max length for system identifiers (e.g. table names, 
+# attribute names, function names, etc.)  
+#
+# These MUST be set here.  DO NOT COMMENT THESE OUT
+# Setting these too high will result in excess space usage for system catalogs
+# Setting them too low will make the system unusable.
+# values between 16 and 64 that are multiples of four are recommended.
+#
+# NOTE also that databases with different NAMEDATALEN's cannot interoperate!
+#
+NAMEDATALEN = 32
+# OIDNAMELEN should be set to NAMEDATALEN + sizeof(Oid)
+OIDNAMELEN = 36
+
+CFLAGS+= -DNAMEDATALEN=$(NAMEDATALEN) -DOIDNAMELEN=$(OIDNAMELEN)
+
+##############################################################################
+#
+# FEATURES 
+#
+# To disable a feature, comment out the entire definition
+# (that is, prepend '#', don't set it to "0" or "no").
+
+# Comment out ENFORCE_ALIGNMENT if you do NOT want unaligned access to
+# multi-byte types to generate a bus error.
+ENFORCE_ALIGNMENT= true
+
+# Comment out CDEBUG to turn off debugging and sanity-checking.
+#
+#      XXX on MIPS, use -g3 if you want to compile with -O
+CDEBUG= -g
+
+# turn this on if you prefer European style dates instead of American
+# style dates
+# EUROPEAN_DATES = 1
+
+# Comment out PROFILE to disable profiling.
+#
+#      XXX define on MIPS if you want to be able to use pixie.
+#          note that this disables dynamic loading!
+#PROFILE= -p -non_shared
+
+# About the use of readline in psql:
+#    psql does not require the GNU readline and history libraries. Hence, we
+#    do not compile with them by default. However, there are hooks in the
+#    program which supports the use of GNU readline and history. Should you
+#    decide to use them, change USE_READLINE to true and change READLINE_INCDIR
+#    and READLINE_LIBDIR to reflect the location of the readline and histroy
+#    headers and libraries.
+#
+#USE_READLINE= true
+
+# directories for the readline and history libraries.
+READLINE_INCDIR=  /usr/local/include
+HISTORY_INCDIR=   /usr/local/include
+READLINE_LIBDIR=  /usr/local/lib
+HISTORY_LIBDIR=   /usr/local/lib
+
+# If you do not plan to use Host based authentication,
+# comment out the following line
+HBA = 1
+ 
+ifdef HBA
+HBAFLAGS= -DHBA
+endif
+
+
+
+# If you plan to use Kerberos for authentication...
+#
+# Comment out KRBVERS if you do not use Kerberos.
+#      Set KRBVERS to "4" for Kerberos v4, "5" for Kerberos v5.
+#      XXX Edit the default Kerberos variables below!
+#
+#KRBVERS=      5
+
+
+# Globally pass Kerberos file locations.
+#      these are used in the postmaster and all libpq applications.
+#
+#      Adjust KRBINCS and KRBLIBS to reflect where you have Kerberos
+#              include files and libraries installed.
+#      PG_KRB_SRVNAM is the name under which POSTGRES is registered in
+#              the Kerberos database (KDC).
+#      PG_KRB_SRVTAB is the location of the server's keytab file.
+#
+ifdef KRBVERS
+KRBINCS= -I/usr/athena/include
+KRBLIBS= -L/usr/athena/lib
+KRBFLAGS+= $(KRBINCS) -DPG_KRB_SRVNAM='"postgres_dbms"'
+   ifeq ($(KRBVERS), 4)
+KRBFLAGS+= -DKRB4
+KRBFLAGS+= -DPG_KRB_SRVTAB='"/etc/srvtab"'
+KRBLIBS+= -lkrb -ldes
+   else
+   ifeq ($(KRBVERS), 5)
+KRBFLAGS+= -DKRB5
+KRBFLAGS+= -DPG_KRB_SRVTAB='"FILE:/krb5/srvtab.postgres"'
+KRBLIBS+= -lkrb5 -lcrypto -lcom_err -lisode
+   endif
+   endif
+endif
+
+#
+# location of Tcl/Tk headers and libraries
+#
+# Uncomment this to build the tcl utilities.
+USE_TCL= true
+# customize these to your site's needs
+#
+TCL_INCDIR= /usr/local/devel/tcl7.4/include
+TCL_LIBDIR= /usr/local/devel/tcl7.4/lib
+TCL_LIB = -ltcl7.4
+TK_INCDIR=  /usr/local/devel/tk4.0/include
+TK_LIBDIR=  /usr/local/devel/tk4.0/lib
+TK_LIB = -ltk4.0
+
+#
+# include port specific rules and variables. For instance:
+#
+# signal(2) handling - this is here because it affects some of 
+# the frontend commands as well as the backend server.
+#
+# Ultrix and SunOS provide BSD signal(2) semantics by default.
+#
+# SVID2 and POSIX signal(2) semantics differ from BSD signal(2) 
+# semantics.  We can use the POSIX sigaction(2) on systems that
+# allow us to request restartable signals (SA_RESTART).
+#
+# Some systems don't allow restartable signals at all unless we 
+# link to a special BSD library.
+#
+# We devoutly hope that there aren't any systems that provide
+# neither POSIX signals nor BSD signals.  The alternative 
+# is to do signal-handler reinstallation, which doesn't work well 
+# at all.
+#
+-include $(MKDIR)/port/postgres.mk.$(PORTNAME)
+
+##############################################################################
+#
+# Flags for CC and LD. (depend on CDEBUG and PROFILE)
+#
+
+# Globally pass debugging/optimization/profiling flags based
+# on the options selected above.
+ifdef CDEBUG
+   CFLAGS+= $(CDEBUG)
+   LDFLAGS+= $(CDEBUG)
+else
+   ifndef CFLAGS_OPT
+      CFLAGS_OPT= -O
+   endif
+   CFLAGS+= $(CFLAGS_OPT)
+#
+# Uncommenting this will make things go a LOT faster, but you will
+# also lose a lot of useful error-checking.
+#
+   CFLAGS+= -DNO_ASSERT_CHECKING
+endif
+
+ifdef PROFILE
+CFLAGS+= $(PROFILE)
+LDFLAGS+= $(PROFILE)
+endif
+
+# Globally pass PORTNAME
+CFLAGS+= -DPORTNAME_$(PORTNAME)
+
+# Globally pass the default TCP port for postmaster(1).
+CFLAGS+= -DPOSTPORT='"5432"'
+
+# include flags from mk/port/postgres.mk.$(PORTNAME)
+CFLAGS+= $(CFLAGS_BE)
+LDADD+= $(LDADD_BE)
+LDFLAGS+= $(LDFLAGS_BE)
+
+
+##############################################################################
+#
+# Miscellaneous configuration
+#
+
+# This is the time, in seconds, at which a given backend server
+# will wait on a lock before deciding to abort the transaction
+# (this is what we do in lieu of deadlock detection).
+#
+# Low numbers are not recommended as they will tend to cause
+# false aborts if many transactions are long-lived.
+CFLAGS+= -DDEADLOCK_TIMEOUT=60
+
+srcdir=                $(SRCDIR)
+includedir=    $(HEADERDIR)
+objdir=                obj
+
+
+##############################################################################
+#
+# Customization.
+#
+-include $(MKDIR)/../Makefile.custom
+
+
diff --git a/src/backend/Makefile b/src/backend/Makefile

new file mode 100644 (file)

index 0000000..d3972ec
--- /dev/null
+++ b/src/backend/Makefile
@@ -0,0 +1,289 @@
+#-------------------------------------------------------------------------
+#
+# Makefile--
+#    Makefile for the postgres backend (and the postmaster)
+#
+# Copyright (c) 1994, Regents of the University of California
+#
+#
+# IDENTIFICATION
+#    $Header$
+#
+#-------------------------------------------------------------------------
+
+#
+# The following turns on intermediate linking of partial objects to speed
+# the link cycle during development. (To turn this off, put "BIGOBJS=false"
+# in your custom makefile, ../Makefile.custom.)
+BIGOBJS= true
+
+
+PROG=  postgres
+
+MKDIR= ../mk
+include $(MKDIR)/postgres.mk
+
+
+include $(CURDIR)/access/Makefile.inc
+include $(CURDIR)/bootstrap/Makefile.inc
+include $(CURDIR)/catalog/Makefile.inc
+include $(CURDIR)/commands/Makefile.inc
+include $(CURDIR)/executor/Makefile.inc
+include $(CURDIR)/include/Makefile.inc
+include $(CURDIR)/lib/Makefile.inc
+include $(CURDIR)/libpq/Makefile.inc
+include $(CURDIR)/main/Makefile.inc
+include $(CURDIR)/nodes/Makefile.inc
+include $(CURDIR)/optimizer/Makefile.inc
+include $(CURDIR)/parser/Makefile.inc
+include $(CURDIR)/port/Makefile.inc
+include $(CURDIR)/postmaster/Makefile.inc
+include $(CURDIR)/regex/Makefile.inc
+include $(CURDIR)/rewrite/Makefile.inc
+include $(CURDIR)/storage/Makefile.inc
+include $(CURDIR)/tcop/Makefile.inc
+include $(CURDIR)/tioga/Makefile.inc
+include $(CURDIR)/utils/Makefile.inc
+
+SRCS:= ${SRCS_ACCESS} ${SRCS_BOOTSTRAP} $(SRCS_CATALOG) ${SRCS_COMMANDS} \
+       ${SRCS_EXECUTOR} $(SRCS_LIB) $(SRCS_LIBPQ) ${SRCS_MAIN} \
+       ${SRCS_NODES} ${SRCS_OPTIMIZER} ${SRCS_PARSER} ${SRCS_PORT} \
+       $(SRCS_POSTMASTER) ${SRCS_REGEX} ${SRCS_REWRITE} ${SRCS_STORAGE} \
+       ${SRCS_TCOP} ${SRCS_UTILS} 
+
+ifeq ($(BIGOBJS), true)
+OBJS= ACCESS.o BOOTSTRAP.o COMMANDS.o EXECUTOR.o MAIN.o MISC.o NODES.o \
+       PARSER.o OPTIMIZER.o REGEX.o REWRITE.o STORAGE.o TCOP.o UTILS.o
+CLEANFILES+= $(subst .s,.o,$(SRCS:.c=.o)) $(OBJS)
+else
+OBJS:= $(subst .s,.o,$(SRCS:%.c=$(objdir)/%.o))
+CLEANFILES+= $(notdir $(OBJS))
+endif
+
+#############################################################################
+#
+# TIOGA stuff
+#
+ifdef TIOGA
+SRCS+= $(SRCS_TIOGA) 
+   ifeq ($(BIGOBJS), true)
+TIOGA.o:       $(SRCS_TIOGA:%.c=$(objdir)/%.o)
+       $(make_partial)
+OBJS+= TIOGA.o
+CLEANFILES+= $(SRCS_TIOGA:%.c=%.o) TIOGA.o
+   else
+OBJS+= $(SRCS_TIOGA:%.c=$(objdir)/%.o)
+   endif
+endif
+
+
+#############################################################################
+#
+# Compiling the postgres backend.
+#
+CFLAGS+=  -DPOSTGRESDIR='"$(POSTGRESDIR)"' \
+       -DPGDATADIR='"$(DATADIR)"' \
+       -I$(CURDIR)/. -I$(CURDIR)/$(objdir) \
+       -I$(CURDIR)/include \
+       -I$(CURDIR)/port/$(PORTNAME)
+
+# turn this on if you prefer European style dates instead of American
+# style dates
+ifdef EUROPEAN_DATES
+CFLAGS += -DEUROPEAN_STYLE
+endif
+
+# kerberos flags
+ifdef KRBVERS
+CFLAGS+= $(KRBFLAGS)
+LDADD+= $(KRBLIBS)
+endif
+
+# host based access flags
+ifdef HBA
+CFLAGS+= $(HBAFLAGS)
+endif
+ 
+
+
+#
+# All systems except NEXTSTEP require the math library.
+# Loader flags for system-dependent libraries are appended in
+#      src/backend/port/$(PORTNAME)/Makefile.inc
+#
+ifneq ($(PORTNAME), next)
+LDADD+=        -lm
+endif
+
+# statically link in libc for linux
+ifeq ($(PORTNAME), linux)
+LDADD+= -lc
+endif
+
+postgres: $(POSTGRES_DEPEND) $(OBJS) $(EXPORTS)
+       $(CC) $(LDFLAGS) -o $(objdir)/$(@F) $(addprefix $(objdir)/,$(notdir $(OBJS))) $(LDADD)
+
+# Make this target first if you are doing a parallel make.
+# The targets in 'first' need to be made sequentially because of dependencies.
+# Then, you can make 'all' with parallelism turned on.
+first: $(POSTGRES_DEPEND)
+
+
+#############################################################################
+#
+# Partial objects for platforms with slow linkers.
+#
+ifeq ($(BIGOBJS), true)
+
+OBJS_ACCESS:=     $(SRCS_ACCESS:%.c=$(objdir)/%.o)
+OBJS_BOOTSTRAP:=   $(SRCS_BOOTSTRAP:%.c=$(objdir)/%.o)
+OBJS_CATALOG:=    $(SRCS_CATALOG:%.c=$(objdir)/%.o)
+OBJS_COMMANDS:=           $(SRCS_COMMANDS:%.c=$(objdir)/%.o)
+OBJS_EXECUTOR:=           $(SRCS_EXECUTOR:%.c=$(objdir)/%.o)
+OBJS_MAIN:=       $(SRCS_MAIN:%.c=$(objdir)/%.o)
+OBJS_POSTMASTER:=  $(SRCS_POSTMASTER:%.c=$(objdir)/%.o)
+OBJS_LIB:=        $(SRCS_LIB:%.c=$(objdir)/%.o)
+OBJS_LIBPQ:=      $(SRCS_LIBPQ:%.c=$(objdir)/%.o)
+OBJS_PORT:=       $(addprefix $(objdir)/,$(subst .s,.o,$(SRCS_PORT:.c=.o)))
+OBJS_NODES:=      $(SRCS_NODES:%.c=$(objdir)/%.o)
+OBJS_PARSER:=     $(SRCS_PARSER:%.c=$(objdir)/%.o)
+OBJS_OPTIMIZER:=   $(SRCS_OPTIMIZER:%.c=$(objdir)/%.o)
+OBJS_REGEX:=      $(SRCS_REGEX:%.c=$(objdir)/%.o)
+OBJS_REWRITE:=    $(SRCS_REWRITE:%.c=$(objdir)/%.o)
+OBJS_STORAGE:=    $(SRCS_STORAGE:%.c=$(objdir)/%.o)
+OBJS_TCOP:=       $(SRCS_TCOP:%.c=$(objdir)/%.o)
+OBJS_UTILS:=      $(SRCS_UTILS:%.c=$(objdir)/%.o)
+
+ACCESS.o:      $(OBJS_ACCESS)
+       $(make_partial)
+BOOTSTRAP.o:   $(OBJS_BOOTSTRAP)
+       $(make_partial)
+COMMANDS.o:    $(OBJS_COMMANDS)
+       $(make_partial)
+EXECUTOR.o:    $(OBJS_EXECUTOR)
+       $(make_partial)
+MAIN.o:                $(OBJS_MAIN) $(OBJS_POSTMASTER)
+       $(make_partial)
+MISC.o:                $(OBJS_CATALOG) $(OBJS_LIB) $(OBJS_LIBPQ) $(OBJS_PORT)
+       $(make_partial)
+NODES.o:       $(OBJS_NODES)
+       $(make_partial)
+PARSER.o:      $(OBJS_PARSER)
+       $(make_partial)
+OPTIMIZER.o:   $(OBJS_OPTIMIZER)
+       $(make_partial)
+REGEX.o:       $(OBJS_REGEX)
+       $(make_partial)
+REWRITE.o:     $(OBJS_REWRITE)
+       $(make_partial)
+STORAGE.o:     $(OBJS_STORAGE)
+       $(make_partial)
+TCOP.o:                $(OBJS_TCOP)
+       $(make_partial)
+UTILS.o:       $(OBJS_UTILS)
+       $(make_partial)
+endif
+
+#############################################################################
+#
+# Installation.
+#
+# Install the bki files to the data directory.  We also copy a version
+# of them that has "PGUID" intact, so one can change the value of the
+# postgres userid before running initdb in the case of customizing the
+# binary release (i.e., fixing up PGUID w/o recompiling the system).
+# Those files are copied out as foo.source.  The program newbki(1) can
+# be run later to reset the postgres login id (but it must be run before
+# initdb is run, or after clearing the data directory with
+# cleardbdir(1)). [newbki distributed with v4r2 but not with Postgres95.]
+#
+
+#      NAMEDATALEN=`egrep "^#define NAMEDATALEN" $(CURDIR)/include/postgres.h | awk '{print $$3}'`; \
+#      OIDNAMELEN=`egrep "^#define OIDNAMELEN" $(CURDIR)/include/postgres.h | awk '{print $$3}'`; \
+
+install: beforeinstall pg_id $(BKIFILES) postgres
+       $(INSTALL) $(INSTL_EXE_OPTS) $(objdir)/postgres $(DESTDIR)$(BINDIR)/postgres
+       @rm -f $(DESTDIR)$(BINDIR)/postmaster
+       cd $(DESTDIR)$(BINDIR); ln -s postgres postmaster
+       @cd $(objdir); \
+       PG_UID=`./pg_id $(POSTGRESLOGIN)`; \
+       POSTGRESLOGIN=$(POSTGRESLOGIN);\
+       echo "NAMEDATALEN = $(NAMEDATALEN)"; \
+       echo "OIDNAMELEN = $(OIDNAMELEN)"; \
+       case $$PG_UID in "NOUSER") \
+               echo "Warning: no account named $(POSTGRESLOGIN), using yours";\
+               POSTGRESLOGIN=`whoami`; \
+               PG_UID=`./pg_id`;; \
+       esac ;\
+       for bki in $(BKIFILES); do \
+               sed \
+                   -e "s/postgres PGUID/$$POSTGRESLOGIN $$PG_UID/" \
+                   -e "s/NAMEDATALEN/$(NAMEDATALEN)/g" \
+                   -e "s/OIDNAMELEN/$(OIDNAMELEN)/g" \
+                   -e "s/PGUID/$$PG_UID/" \
+                   < $$bki > $$bki.sed ; \
+               echo "Installing $(DESTDIR)$(DATADIR)/files/$$bki."; \
+               $(INSTALL) $(INSTLOPTS) \
+                   $$bki.sed $(DESTDIR)$(DATADIR)/files/$$bki; \
+               rm -f $$bki.sed; \
+               echo "Installing $(DESTDIR)$(DATADIR)/files/$$bki.source."; \
+               $(INSTALL) $(INSTLOPTS) \
+                   $$bki $(DESTDIR)$(DATADIR)/files/$$bki.source; \
+       done;
+       @echo "Installing $(DATADIR)/pg_hba";
+       @cp $(srcdir)/libpq/pg_hba $(DATADIR)
+       @chmod 644 $(DATADIR)/pg_hba
+
+
+# so we can get the UID of the postgres owner (w/o moving pg_id to
+# src/tools). We just want the vanilla LDFLAGS for pg_id
+IDLDFLAGS:= $(LDFLAGS)
+ifeq ($(PORTNAME), hpux)
+ifeq ($(CC), cc)
+IDLDFLAGS+= -Aa -D_HPUX_SOURCE
+endif
+endif
+pg_id: $(srcdir)/bin/pg_id/pg_id.c
+       $(CC) $(IDLDFLAGS) -o $(objdir)/$(@F) $<
+
+CLEANFILES+= pg_id postgres
+
+
+#############################################################################
+#
+# Support for code development.
+#
+
+#
+# Build the file, "./ID", used by the "gid" (grep-for-identifier) tool
+#
+IDFILE=        ID
+.PHONY: $(IDFILE)
+$(IDFILE):
+       $(CURDIR)/makeID $(PORTNAME)
+
+#
+# Special rule to generate cpp'd version of a .c file.  This is
+# especially useful given all the hellish macro processing going on.
+# The cpp'd version has a .C suffix.  To create foo.C from foo.c, just
+# type
+#      bmake foo.C
+#
+%.cpp: %.c
+       $(CC) -E $(CFLAGS) $(<:.C=.c) | cat -s | cb | tr -s '\012*' '\012' > $(objdir)/$(@F)
+
+cppall: $(SRCS:.c=.cpp)
+
+#
+# To use Purify (SunOS only), define PURIFY to be the path (and
+# options) with which to invoke the Purify loader.  Only the executable
+# needs to be loaded with Purify.
+#
+# PURIFY = /usr/sww/bin/purify -cache-dir=/usr/local/postgres/src/backend/purify-cache
+#.if defined(PURIFY)
+#${PROG}: $(POSTGRES_DEPEND) $(OBJS) $(EXPORTS)
+#      ${PURIFY} ${CC} ${LDFLAGS} -o $(objdir)/$(@F) $(addprefix $(objdir)/,$(notdir $(OBJS))) $(LDADD)
+#
+#CLEANFILES+= .purify* .pure .lock.*.o *_pure_*.o *.pure_*link*
+#.endif
+
diff --git a/src/backend/access/Makefile.inc b/src/backend/access/Makefile.inc

new file mode 100644 (file)

index 0000000..940ed7c
--- /dev/null
+++ b/src/backend/access/Makefile.inc
@@ -0,0 +1,35 @@
+#-------------------------------------------------------------------------
+#
+# Makefile.inc--
+#    Makefile for the access methods module
+#
+# Copyright (c) 1994, Regents of the University of California
+#
+#
+# IDENTIFICATION
+#    $Header$
+#
+#-------------------------------------------------------------------------
+
+accdir=$(CURDIR)/access
+VPATH:=$(VPATH):$(accdir):\
+       $(accdir)/common:$(accdir)/hash:$(accdir)/heap:$(accdir)/index:\
+       $(accdir)/rtree:$(accdir)/nbtree:$(accdir)/transam
+
+
+SUBSRCS=
+include $(accdir)/common/Makefile.inc
+include $(accdir)/hash/Makefile.inc
+include $(accdir)/heap/Makefile.inc
+include $(accdir)/index/Makefile.inc
+include $(accdir)/rtree/Makefile.inc
+include $(accdir)/nbtree/Makefile.inc
+include $(accdir)/transam/Makefile.inc
+SRCS_ACCESS:= $(SUBSRCS)
+
+HEADERS+= attnum.h funcindex.h genam.h hash.h \
+       heapam.h hio.h htup.h ibit.h iqual.h istrat.h \
+       itup.h nbtree.h printtup.h relscan.h rtree.h \
+       sdir.h skey.h strat.h transam.h tupdesc.h tupmacs.h \
+       valid.h xact.h
+
diff --git a/src/backend/access/attnum.h b/src/backend/access/attnum.h

new file mode 100644 (file)

index 0000000..94a0730
--- /dev/null
+++ b/src/backend/access/attnum.h
@@ -0,0 +1,61 @@
+/*-------------------------------------------------------------------------
+ *
+ * attnum.h--
+ *    POSTGRES attribute number definitions.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id$
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef        ATTNUM_H
+#define ATTNUM_H
+
+#include "c.h"
+
+/*
+ * user defined attribute numbers start at 1.  -ay 2/95
+ */
+typedef int16          AttrNumber;
+
+#define InvalidAttrNumber      0
+
+/* ----------------
+ *     support macros
+ * ----------------
+ */
+/*
+ * AttributeNumberIsValid --
+ *     True iff the attribute number is valid.
+ */
+#define AttributeNumberIsValid(attributeNumber) \
+    ((bool) ((attributeNumber) != InvalidAttrNumber))
+
+/*
+ * AttrNumberIsForUserDefinedAttr --
+ *     True iff the attribute number corresponds to an user defined attribute.
+ */
+#define AttrNumberIsForUserDefinedAttr(attributeNumber) \
+    ((bool) ((attributeNumber) > 0))
+
+/*
+ * AttrNumberGetAttrOffset --
+ *     Returns the attribute offset for an attribute number.
+ *
+ * Note:
+ *     Assumes the attribute number is for an user defined attribute.
+ */
+#define AttrNumberGetAttrOffset(attNum) \
+     (AssertMacro(AttrNumberIsForUserDefinedAttr(attNum)) ? \
+      ((attNum - 1)) : 0)
+
+/*
+ * AttributeOffsetGetAttributeNumber --
+ *     Returns the attribute number for an attribute offset.
+ */
+#define AttrOffsetGetAttrNumber(attributeOffset) \
+     ((AttrNumber) (1 + attributeOffset))
+
+#endif /* ATTNUM_H */
diff --git a/src/backend/access/common/Makefile.inc b/src/backend/access/common/Makefile.inc

new file mode 100644 (file)

index 0000000..cc3c408
--- /dev/null
+++ b/src/backend/access/common/Makefile.inc
@@ -0,0 +1,16 @@
+#-------------------------------------------------------------------------
+#
+# Makefile.inc--
+#    Makefile for access/common
+#
+# Copyright (c) 1994, Regents of the University of California
+#
+#
+# IDENTIFICATION
+#    $Header$
+#
+#-------------------------------------------------------------------------
+
+SUBSRCS+= heaptuple.c heapvalid.c indextuple.c indexvalid.c printtup.c \
+       scankey.c tupdesc.c
+
diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c

new file mode 100644 (file)

index 0000000..5668311
--- /dev/null
+++ b/src/backend/access/common/heaptuple.c
@@ -0,0 +1,1011 @@
+/*-------------------------------------------------------------------------
+ *
+ * heaptuple.c--
+ *    This file contains heap tuple accessor and mutator routines, as well
+ *    as a few various tuple utilities.
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header$
+ *
+ * NOTES
+ *    The old interface functions have been converted to macros
+ *    and moved to heapam.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#include <string.h>
+
+#include "postgres.h"
+
+#include "access/htup.h"
+#include "access/itup.h"
+#include "access/tupmacs.h"
+#include "access/skey.h"
+#include "storage/ipc.h"
+#include "storage/buf.h"
+#include "storage/bufmgr.h"
+#include "access/transam.h"
+#include "storage/bufpage.h"           /* for MAXTUPLEN */
+#include "storage/itemptr.h"
+#include "utils/memutils.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/rel.h"
+#include "utils/nabstime.h"
+
+/* this is so the sparcstation debugger works */
+
+#ifndef NO_ASSERT_CHECKING
+#ifdef sparc
+#define register
+#endif /* sparc */
+#endif /* NO_ASSERT_CHECKING */
+
+/* ----------------------------------------------------------------
+ *                     misc support routines
+ * ----------------------------------------------------------------
+ */
+
+/* ----------------
+ *     ComputeDataSize
+ * ----------------
+ */
+Size
+ComputeDataSize(TupleDesc tupleDesc,
+               Datum value[],
+               char nulls[])
+{
+    uint32 length;
+    int i;
+    int numberOfAttributes = tupleDesc->natts;
+    AttributeTupleForm *att = tupleDesc->attrs;
+    
+    for (length = 0, i = 0; i < numberOfAttributes; i++) {
+       if (nulls[i] != ' ') continue;
+           
+       switch (att[i]->attlen) {
+       case -1:
+           /*
+            * This is the size of the disk representation and so
+            * must include the additional sizeof long.
+            */
+           if (att[i]->attalign == 'd') {
+               length = DOUBLEALIGN(length)
+                   + VARSIZE(DatumGetPointer(value[i]));
+           } else {
+               length = INTALIGN(length)
+                   + VARSIZE(DatumGetPointer(value[i]));
+           }
+           break;
+       case sizeof(char):
+           length++;
+           break;
+       case sizeof(short):
+           length = SHORTALIGN(length + sizeof(short));
+           break;
+       case sizeof(int32):
+           length = INTALIGN(length + sizeof(int32));
+           break;
+       default:
+           if (att[i]->attlen < sizeof(int32))
+               elog(WARN, "ComputeDataSize: attribute %d has len %d",
+                    i, att[i]->attlen);
+           if (att[i]->attalign == 'd')
+               length = DOUBLEALIGN(length) + att[i]->attlen;
+           else
+               length = LONGALIGN(length) + att[i]->attlen;
+           break;
+       }
+    }
+    
+    return length;
+}
+
+/* ----------------
+ *     DataFill
+ * ----------------
+ */
+void
+DataFill(char *data,
+        TupleDesc tupleDesc,
+        Datum value[],
+        char nulls[],
+        char *infomask,
+        bits8 bit[])
+{
+    bits8      *bitP;
+    int                bitmask;
+    uint32     length;
+    int                i;
+    int         numberOfAttributes = tupleDesc->natts;
+    AttributeTupleForm* att = tupleDesc->attrs;
+    
+    if (bit != NULL) {
+       bitP = &bit[-1];
+       bitmask = CSIGNBIT;
+    }
+    
+    *infomask = 0;
+    
+    for (i = 0; i < numberOfAttributes; i++) {
+       if (bit != NULL) {
+           if (bitmask != CSIGNBIT) {
+               bitmask <<= 1;
+           } else {
+               bitP += 1;
+               *bitP = 0x0;
+               bitmask = 1;
+           }
+           
+           if (nulls[i] == 'n') {
+               *infomask |= HEAP_HASNULL;
+               continue;
+           }
+           
+           *bitP |= bitmask;
+       }
+           
+       switch (att[i]->attlen) {
+       case -1:
+           *infomask |= HEAP_HASVARLENA;
+           if (att[i]->attalign=='d') {
+               data = (char *) DOUBLEALIGN(data);
+           } else {
+               data = (char *) INTALIGN(data);
+           }
+           length = VARSIZE(DatumGetPointer(value[i]));
+           memmove(data, DatumGetPointer(value[i]),length);
+           data += length;
+           break;
+       case sizeof(char):
+           *data = att[i]->attbyval ?
+               DatumGetChar(value[i]) : *((char *) value[i]);
+           data += sizeof(char);
+           break;
+       case sizeof(int16):
+           data = (char *) SHORTALIGN(data);
+           * (short *) data = (att[i]->attbyval ?
+                               DatumGetInt16(value[i]) :
+                               *((short *) value[i]));
+           data += sizeof(short);
+           break;
+       case sizeof(int32):
+           data = (char *) INTALIGN(data);
+           * (int32 *) data = (att[i]->attbyval ?
+                               DatumGetInt32(value[i]) :
+                               *((int32 *) value[i]));
+           data += sizeof(int32);
+           break;
+       default:
+           if (att[i]->attlen < sizeof(int32))
+               elog(WARN, "DataFill: attribute %d has len %d",
+                    i, att[i]->attlen);
+           if (att[i]->attalign == 'd') {
+               data = (char *) DOUBLEALIGN(data);
+               memmove(data, DatumGetPointer(value[i]),
+                       att[i]->attlen);
+               data += att[i]->attlen;
+           } else {
+               data = (char *) LONGALIGN(data);
+               memmove(data, DatumGetPointer(value[i]),
+                       att[i]->attlen);
+               data += att[i]->attlen;
+           }
+                   
+       }
+    }
+}
+
+/* ----------------------------------------------------------------
+ *                     heap tuple interface
+ * ----------------------------------------------------------------
+ */
+
+/* ----------------
+ *     heap_attisnull  - returns 1 iff tuple attribute is not present
+ * ----------------
+ */
+int
+heap_attisnull(HeapTuple tup, int attnum)
+{
+    if (attnum > (int)tup->t_natts)
+       return (1);
+    
+    if (HeapTupleNoNulls(tup)) return(0);
+    
+    if (attnum > 0) {
+       return(att_isnull(attnum - 1, tup->t_bits));
+    } else
+       switch (attnum) {
+       case SelfItemPointerAttributeNumber:
+       case ObjectIdAttributeNumber:
+       case MinTransactionIdAttributeNumber:
+       case MinCommandIdAttributeNumber:
+       case MaxTransactionIdAttributeNumber:
+       case MaxCommandIdAttributeNumber:
+       case ChainItemPointerAttributeNumber:
+       case AnchorItemPointerAttributeNumber:
+       case MinAbsoluteTimeAttributeNumber:
+       case MaxAbsoluteTimeAttributeNumber:
+       case VersionTypeAttributeNumber:
+           break;
+           
+       case 0:
+           elog(WARN, "heap_attisnull: zero attnum disallowed");
+           
+       default:
+           elog(WARN, "heap_attisnull: undefined negative attnum");
+       }
+    
+    return (0);
+}
+
+/* ----------------------------------------------------------------
+ *              system attribute heap tuple support
+ * ----------------------------------------------------------------
+ */
+
+/* ----------------
+ *     heap_sysattrlen
+ *
+ *     This routine returns the length of a system attribute.
+ * ----------------
+ */
+int
+heap_sysattrlen(AttrNumber attno)
+{
+    HeapTupleData      *f = NULL;
+    int                        len;
+
+    switch (attno) {
+    case SelfItemPointerAttributeNumber:
+       len = sizeof f->t_ctid;
+       break;
+    case ObjectIdAttributeNumber:
+       len = sizeof f->t_oid;
+       break;
+    case MinTransactionIdAttributeNumber:
+       len = sizeof f->t_xmin;
+       break;
+    case MinCommandIdAttributeNumber:
+       len = sizeof f->t_cmin;
+       break;
+    case MaxTransactionIdAttributeNumber:
+       len = sizeof f->t_xmax;
+       break;
+    case MaxCommandIdAttributeNumber:
+       len = sizeof f->t_cmax;
+       break;
+    case ChainItemPointerAttributeNumber:
+       len = sizeof f->t_chain;
+       break;
+    case AnchorItemPointerAttributeNumber:
+       elog(WARN, "heap_sysattrlen: field t_anchor does not exist!");
+       break;
+    case MinAbsoluteTimeAttributeNumber:
+       len = sizeof f->t_tmin;
+       break;
+    case MaxAbsoluteTimeAttributeNumber:
+       len = sizeof f->t_tmax;
+       break;
+    case VersionTypeAttributeNumber:
+       len = sizeof f->t_vtype;
+       break;
+    default:
+       elog(WARN, "sysattrlen: System attribute number %d unknown.",
+            attno);
+       len = 0;
+       break;
+    }
+    return (len);
+}
+
+/* ----------------
+ *     heap_sysattrbyval
+ *
+ *     This routine returns the "by-value" property of a system attribute.
+ * ----------------
+ */
+bool
+heap_sysattrbyval(AttrNumber attno)
+{
+    bool               byval;
+    
+    switch (attno) {
+    case SelfItemPointerAttributeNumber:
+       byval = false;
+       break;
+    case ObjectIdAttributeNumber:
+       byval = true;
+       break;
+    case MinTransactionIdAttributeNumber:
+       byval = true;
+       break;
+    case MinCommandIdAttributeNumber:
+       byval = true;
+       break;
+    case MaxTransactionIdAttributeNumber:
+       byval = true;
+       break;
+    case MaxCommandIdAttributeNumber:
+       byval = true;
+       break;
+    case ChainItemPointerAttributeNumber:
+       byval = false;
+       break;
+    case AnchorItemPointerAttributeNumber:
+       byval = false;
+       break;
+    case MinAbsoluteTimeAttributeNumber:
+       byval = true;
+       break;
+    case MaxAbsoluteTimeAttributeNumber:
+       byval = true;
+       break;
+    case VersionTypeAttributeNumber:
+       byval = true;
+       break;
+    default:
+       byval = true;
+       elog(WARN, "sysattrbyval: System attribute number %d unknown.",
+            attno);
+       break;
+    }
+    
+    return byval;
+}
+
+/* ----------------
+ *     heap_getsysattr
+ * ----------------
+ */
+char *
+heap_getsysattr(HeapTuple tup, Buffer b, int attnum)
+{
+    switch (attnum) {
+    case SelfItemPointerAttributeNumber:
+       return ((char *)&tup->t_ctid);
+    case ObjectIdAttributeNumber:
+       return ((char *) (long) tup->t_oid);
+    case MinTransactionIdAttributeNumber:
+       return ((char *) (long) tup->t_xmin);
+    case MinCommandIdAttributeNumber:
+       return ((char *) (long) tup->t_cmin);
+    case MaxTransactionIdAttributeNumber:
+       return ((char *) (long) tup->t_xmax);
+    case MaxCommandIdAttributeNumber:
+       return ((char *) (long) tup->t_cmax);
+    case ChainItemPointerAttributeNumber:
+       return ((char *) &tup->t_chain);
+    case AnchorItemPointerAttributeNumber:
+       elog(WARN, "heap_getsysattr: t_anchor does not exist!");
+       break;
+       
+       /*
+        *  For tmin and tmax, we need to do some extra work.  These don't
+        *  get filled in until the vacuum cleaner runs (or we manage to flush
+        *  a page after setting the value correctly below).  If the vacuum
+        *  cleaner hasn't run yet, then the times stored in the tuple are
+        *  wrong, and we need to look up the commit time of the transaction.
+        *  We cache this value in the tuple to avoid doing the work more than
+        *  once.
+        */
+       
+    case MinAbsoluteTimeAttributeNumber:
+       if (!AbsoluteTimeIsBackwardCompatiblyValid(tup->t_tmin) &&
+           TransactionIdDidCommit(tup->t_xmin))
+           tup->t_tmin = TransactionIdGetCommitTime(tup->t_xmin);
+       return ((char *) (long) tup->t_tmin);
+    case MaxAbsoluteTimeAttributeNumber:
+       if (!AbsoluteTimeIsBackwardCompatiblyReal(tup->t_tmax)) {
+           if (TransactionIdDidCommit(tup->t_xmax))
+               tup->t_tmax = TransactionIdGetCommitTime(tup->t_xmax);
+           else
+               tup->t_tmax = CURRENT_ABSTIME;
+       }
+       return ((char *) (long) tup->t_tmax);
+    case VersionTypeAttributeNumber:
+       return ((char *) (long) tup->t_vtype);
+    default:
+       elog(WARN, "heap_getsysattr: undefined attnum %d", attnum);
+    }
+    return(NULL);
+}
+
+/* ----------------
+ *     fastgetattr
+ *
+ *     This is a newer version of fastgetattr which attempts to be
+ *     faster by caching attribute offsets in the attribute descriptor.
+ *
+ *     an alternate way to speed things up would be to cache offsets
+ *     with the tuple, but that seems more difficult unless you take
+ *     the storage hit of actually putting those offsets into the
+ *     tuple you send to disk.  Yuck.
+ *
+ *     This scheme will be slightly slower than that, but should
+ *     preform well for queries which hit large #'s of tuples.  After
+ *     you cache the offsets once, examining all the other tuples using
+ *     the same attribute descriptor will go much quicker. -cim 5/4/91
+ * ----------------
+ */
+char *
+fastgetattr(HeapTuple tup,
+           int attnum,
+           TupleDesc tupleDesc,
+           bool *isnull)
+{
+    char *tp;          /* ptr to att in tuple */
+    bits8  *bp;                /* ptr to att in tuple */
+    int slow;          /* do we have to walk nulls? */
+    AttributeTupleForm *att = tupleDesc->attrs;
+    
+    /* ----------------
+     * sanity checks
+     * ----------------
+     */
+    
+    Assert(PointerIsValid(isnull));
+    Assert(attnum > 0);
+    
+    /* ----------------
+     *   Three cases:
+     * 
+     *   1: No nulls and no variable length attributes.
+     *   2: Has a null or a varlena AFTER att.
+     *   3: Has nulls or varlenas BEFORE att.
+     * ----------------
+     */
+    
+    *isnull =  false;
+    
+    if (HeapTupleNoNulls(tup)) {
+       attnum--;
+       if (att[attnum]->attcacheoff > 0) {
+           return (char *)
+               fetchatt( &(att[attnum]),
+                        (char *)tup + tup->t_hoff + att[attnum]->attcacheoff);
+       } else if (attnum == 0) {
+           /*
+            * first attribute is always at position zero
+            */
+           return((char *) fetchatt(&(att[0]), (char *) tup + tup->t_hoff));
+       }
+           
+       tp = (char *) tup + tup->t_hoff;
+           
+       slow = 0;
+    } else {
+       /*
+        * there's a null somewhere in the tuple
+        */
+
+       bp = tup->t_bits;
+       tp = (char *) tup + tup->t_hoff;
+       slow = 0;
+       attnum--;
+           
+       /* ----------------
+        *      check to see if desired att is null
+        * ----------------
+        */
+       
+       if (att_isnull(attnum, bp)) {
+           *isnull = true;
+           return NULL;
+       }
+
+       /* ----------------
+        *      Now check to see if any preceeding bits are null...
+        * ----------------
+        */
+       
+       {
+           register int  i = 0; /* current offset in bp */
+               
+           for (i = 0; i < attnum && !slow; i++) {
+               if (att_isnull(i, bp)) slow = 1;
+           }
+       }
+    }
+    
+    /*
+     * now check for any non-fixed length attrs before our attribute
+     */
+    if (!slow) {
+       if (att[attnum]->attcacheoff > 0) {
+           return (char *)
+               fetchatt(&(att[attnum]),
+                        tp + att[attnum]->attcacheoff);
+       } else if (attnum == 0) {
+           return (char *)
+               fetchatt(&(att[0]), (char *) tup + tup->t_hoff);
+       } else if (!HeapTupleAllFixed(tup)) {
+           register int j = 0;
+                   
+           for (j = 0; j < attnum && !slow; j++)
+               if (att[j]->attlen < 1) slow = 1;
+       }
+    }
+    
+    /*
+     * if slow is zero, and we got here, we know that we have a tuple with
+     * no nulls.  We also have to initialize the remainder of
+     * the attribute cached offset values.
+     */
+    if (!slow) {
+       register int j = 1;
+       register long off;
+           
+       /*
+        * need to set cache for some atts
+        */
+           
+       att[0]->attcacheoff = 0;
+       
+       while (att[j]->attcacheoff > 0) j++;
+       
+       off = att[j-1]->attcacheoff + att[j-1]->attlen;
+       
+       for (; j < attnum + 1; j++) {
+           switch(att[j]->attlen) {
+           case -1:
+               off = (att[j]->attalign=='d') ?
+                   DOUBLEALIGN(off) : INTALIGN(off);
+               break;
+           case sizeof(char):
+               break;
+           case sizeof(short):
+               off = SHORTALIGN(off);
+               break;
+           case sizeof(int32):
+               off = INTALIGN(off);
+               break;
+           default:
+               if (att[j]->attlen < sizeof(int32)) {
+                   elog(WARN,
+                        "fastgetattr: attribute %d has len %d",
+                        j, att[j]->attlen);
+               }
+               if (att[j]->attalign == 'd')
+                   off = DOUBLEALIGN(off);
+               else
+                   off = LONGALIGN(off);
+               break;
+           }
+                   
+           att[j]->attcacheoff = off;
+           off += att[j]->attlen;
+       }
+       
+       return
+           (char *)fetchatt(&(att[attnum]), tp + att[attnum]->attcacheoff);
+    } else {
+       register bool usecache = true;
+       register int off = 0;
+       register int i;
+       
+       /*
+        * Now we know that we have to walk the tuple CAREFULLY.
+        *
+        * Note - This loop is a little tricky.  On iteration i we
+        * first set the offset for attribute i and figure out how much
+        * the offset should be incremented.  Finally, we need to align the
+        * offset based on the size of attribute i+1 (for which the offset
+        * has been computed). -mer 12 Dec 1991
+        */
+       
+       for (i = 0; i < attnum; i++) {
+           if (!HeapTupleNoNulls(tup)) {
+               if (att_isnull(i, bp)) {
+                   usecache = false;
+                   continue;
+               }
+           }
+           switch (att[i]->attlen) {
+           case -1:
+               off = (att[i]->attalign=='d') ?
+                   DOUBLEALIGN(off) : INTALIGN(off);
+               break;
+           case sizeof(char):
+               break;
+           case sizeof(short):
+               off = SHORTALIGN(off);
+               break;
+           case sizeof(int32):
+               off = INTALIGN(off);
+               break;
+           default:
+               if (att[i]->attlen < sizeof(int32))
+                   elog(WARN,
+                        "fastgetattr2: attribute %d has len %d",
+                        i, att[i]->attlen);
+               if (att[i]->attalign == 'd')
+                   off = DOUBLEALIGN(off);
+               else
+                   off = LONGALIGN(off);
+               break;
+           }
+           if (usecache && att[i]->attcacheoff > 0) {
+               off = att[i]->attcacheoff;
+               if (att[i]->attlen == -1) {
+                   usecache = false;
+               }
+           } else {
+               if (usecache) att[i]->attcacheoff = off;
+           }
+                   
+           switch(att[i]->attlen) {
+           case sizeof(char):
+               off++;
+               break;
+           case sizeof(int16):
+               off += sizeof(int16);
+               break;
+           case sizeof(int32):
+               off += sizeof(int32);
+               break;
+           case -1:
+               usecache = false;
+               off += VARSIZE(tp + off);
+               break;
+           default:
+               off += att[i]->attlen;
+               break;
+           }
+       }
+       switch (att[attnum]->attlen) {
+       case -1:
+           off = (att[attnum]->attalign=='d')?
+               DOUBLEALIGN(off) : INTALIGN(off);
+           break;
+       case sizeof(char):
+           break;
+       case sizeof(short):
+           off = SHORTALIGN(off);
+           break;
+       case sizeof(int32):
+           off = INTALIGN(off);
+           break;
+       default:
+           if (att[attnum]->attlen < sizeof(int32))
+               elog(WARN, "fastgetattr3: attribute %d has len %d",
+                    attnum, att[attnum]->attlen);
+           if (att[attnum]->attalign == 'd')
+               off = DOUBLEALIGN(off);
+           else
+               off = LONGALIGN(off);
+           break;
+       }
+       return((char *) fetchatt(&(att[attnum]), tp + off));
+    }
+}
+
+/* ----------------
+ *     heap_getattr
+ *
+ *     returns an attribute from a heap tuple.  uses 
+ * ----------------
+ */
+char *
+heap_getattr(HeapTuple tup,
+            Buffer b,
+            int attnum,
+            TupleDesc tupleDesc,
+            bool *isnull)
+{
+    bool       localIsNull;
+
+    /* ----------------
+     * sanity checks
+     * ----------------
+     */
+    Assert(tup != NULL);
+    
+    if (! PointerIsValid(isnull))
+       isnull = &localIsNull;
+    
+    if (attnum > (int) tup->t_natts) {
+       *isnull = true;
+       return ((char *) NULL);
+    }
+    
+    /* ----------------
+     * take care of user defined attributes
+     * ----------------
+     */
+    if (attnum > 0) {
+       char  *datum;
+       datum = fastgetattr(tup, attnum, tupleDesc, isnull);
+       
+       return (datum);
+    }
+    
+    /* ----------------
+     * take care of system attributes
+     * ----------------
+     */
+    *isnull = false;
+    return
+       heap_getsysattr(tup, b, attnum);
+}
+
+/* ----------------
+ *     heap_copytuple
+ *
+ *     returns a copy of an entire tuple
+ * ----------------
+ */
+HeapTuple
+heap_copytuple(HeapTuple tuple)
+{
+    HeapTuple  newTuple;
+
+    if (! HeapTupleIsValid(tuple))
+       return (NULL);
+    
+    /* XXX For now, just prevent an undetectable executor related error */
+    if (tuple->t_len > MAXTUPLEN) {
+       elog(WARN, "palloctup: cannot handle length %d tuples",
+            tuple->t_len);
+    }
+    
+    newTuple = (HeapTuple) palloc(tuple->t_len);
+    memmove((char *) newTuple, (char *) tuple, (int) tuple->t_len);
+    return(newTuple);
+}
+
+/* ----------------
+ *     heap_deformtuple
+ *
+ *     the inverse of heap_formtuple (see below)
+ * ----------------
+ */
+void
+heap_deformtuple(HeapTuple tuple,
+                TupleDesc tdesc,
+                Datum values[],
+                char nulls[])
+{
+    int i;
+    int natts;
+    
+    Assert(HeapTupleIsValid(tuple));
+    
+    natts = tuple->t_natts;
+    for (i = 0; i<natts; i++) {
+       bool isnull;
+           
+       values[i] = (Datum)heap_getattr(tuple,
+                                       InvalidBuffer,
+                                       i+1,
+                                       tdesc,
+                                       &isnull);
+       if (isnull)
+           nulls[i] = 'n';
+       else
+           nulls[i] = ' ';
+    }
+}
+
+/* ----------------
+ *     heap_formtuple 
+ *
+ *     constructs a tuple from the given value[] and null[] arrays
+ *
+ * old comments
+ *     Handles alignment by aligning 2 byte attributes on short boundries
+ *     and 3 or 4 byte attributes on long word boundries on a vax; and
+ *     aligning non-byte attributes on short boundries on a sun.  Does
+ *     not properly align fixed length arrays of 1 or 2 byte types (yet).
+ *
+ *     Null attributes are indicated by a 'n' in the appropriate byte
+ *     of the null[].  Non-null attributes are indicated by a ' ' (space).
+ *
+ *     Fix me.  (Figure that must keep context if debug--allow give oid.)
+ *     Assumes in order.
+ * ----------------
+ */
+HeapTuple
+heap_formtuple(TupleDesc tupleDescriptor,
+              Datum value[],
+              char nulls[])
+{
+    char       *tp;    /* tuple pointer */
+    HeapTuple  tuple;  /* return tuple */
+    int                bitmaplen;
+    long       len;
+    int                hoff;
+    bool       hasnull = false;
+    int                i;
+    int         numberOfAttributes = tupleDescriptor->natts;    
+
+    len = sizeof *tuple - sizeof tuple->t_bits;
+    
+    for (i = 0; i < numberOfAttributes && !hasnull; i++) {
+       if (nulls[i] != ' ') hasnull = true;
+    }
+    
+    if (numberOfAttributes > MaxHeapAttributeNumber)
+       elog(WARN, "heap_formtuple: numberOfAttributes of %d > %d",
+            numberOfAttributes, MaxHeapAttributeNumber);
+    
+    if (hasnull) {
+       bitmaplen = BITMAPLEN(numberOfAttributes);
+       len       += bitmaplen;
+    }
+
+    hoff = len = DOUBLEALIGN(len);     /* be conservative here */
+
+    len += ComputeDataSize(tupleDescriptor, value, nulls);
+    
+    tp = (char *) palloc(len);
+    tuple = (HeapTuple) tp;
+
+    memset(tp, 0, (int)len);
+    
+    tuple->t_len =     len;
+    tuple->t_natts =   numberOfAttributes;
+    tuple->t_hoff = hoff;
+    tuple->t_tmin = INVALID_ABSTIME;
+    tuple->t_tmax = CURRENT_ABSTIME;
+    
+    DataFill((char *)tuple + tuple->t_hoff,
+            tupleDescriptor,
+            value,
+            nulls,
+             &tuple->t_infomask,
+            (hasnull ? tuple->t_bits : NULL));
+    
+    return (tuple);
+}
+
+/* ----------------
+ *     heap_modifytuple
+ *
+ *     forms a new tuple from an old tuple and a set of replacement values.
+ * ----------------
+ */
+HeapTuple
+heap_modifytuple(HeapTuple tuple,
+                Buffer buffer,
+                Relation relation,
+                Datum replValue[],
+                char replNull[],
+                char repl[])
+{
+    int                attoff;
+    int                numberOfAttributes;
+    Datum      *value;
+    char       *nulls;
+    bool       isNull;
+    HeapTuple  newTuple;
+    int                madecopy;
+    uint8      infomask;
+    
+    /* ----------------
+     * sanity checks
+     * ----------------
+     */
+    Assert(HeapTupleIsValid(tuple));
+    Assert(BufferIsValid(buffer) || RelationIsValid(relation));
+    Assert(HeapTupleIsValid(tuple));
+    Assert(PointerIsValid(replValue));
+    Assert(PointerIsValid(replNull));
+    Assert(PointerIsValid(repl));
+    
+    /* ----------------
+     * if we're pointing to a disk page, then first
+     *  make a copy of our tuple so that all the attributes
+     *  are available.  XXX this is inefficient -cim
+     * ----------------
+     */
+    madecopy = 0;
+    if (BufferIsValid(buffer) == true) {
+       relation =      (Relation) BufferGetRelation(buffer);
+       tuple =         heap_copytuple(tuple);
+       madecopy = 1;
+    }
+    
+    numberOfAttributes = RelationGetRelationTupleForm(relation)->relnatts;
+    
+    /* ----------------
+     * allocate and fill value[] and nulls[] arrays from either
+     *  the tuple or the repl information, as appropriate.
+     * ----------------
+     */
+    value = (Datum *)  palloc(numberOfAttributes * sizeof *value);
+    nulls =  (char *)  palloc(numberOfAttributes * sizeof *nulls);
+    
+    for (attoff = 0;
+        attoff < numberOfAttributes;
+        attoff += 1) {
+       
+       if (repl[attoff] == ' ') {
+           char *attr;
+
+           attr =
+               heap_getattr(tuple,
+                            InvalidBuffer, 
+                            AttrOffsetGetAttrNumber(attoff),
+                            RelationGetTupleDescriptor(relation),
+                            &isNull) ;
+           value[attoff] = PointerGetDatum(attr);
+           nulls[attoff] = (isNull) ? 'n' : ' ';
+           
+       } else if (repl[attoff] != 'r') {
+           elog(WARN, "heap_modifytuple: repl is \\%3d", repl[attoff]);
+           
+       } else { /* == 'r' */
+           value[attoff] = replValue[attoff];
+           nulls[attoff] =  replNull[attoff];
+       }
+    }
+    
+    /* ----------------
+     * create a new tuple from the values[] and nulls[] arrays
+     * ----------------
+     */
+    newTuple = heap_formtuple(RelationGetTupleDescriptor(relation),
+                             value,
+                             nulls);
+    
+    /* ----------------
+     * copy the header except for t_len, t_natts, t_hoff, t_bits, t_infomask
+     * ----------------
+     */
+    infomask = newTuple->t_infomask;
+    memmove((char *) &newTuple->t_ctid,        /*XXX*/
+           (char *) &tuple->t_ctid,
+           ((char *) &tuple->t_hoff - (char *) &tuple->t_ctid)); /*XXX*/
+    newTuple->t_infomask = infomask;
+    newTuple->t_natts = numberOfAttributes;    /* fix t_natts just in case */
+    
+    /* ----------------
+     * if we made a copy of the tuple, then free it.
+     * ----------------
+     */
+    if (madecopy)
+       pfree(tuple);
+    
+    return
+       newTuple;
+}
+
+/* ----------------------------------------------------------------
+ *                     other misc functions
+ * ----------------------------------------------------------------
+ */
+
+HeapTuple
+heap_addheader(uint32 natts,   /* max domain index */
+              int structlen,   /* its length */
+              char *structure) /* pointer to the struct */
+{
+    register char      *tp;    /* tuple data pointer */
+    HeapTuple          tup;
+    long               len;
+    int                        hoff;
+    
+    AssertArg(natts > 0);
+    
+    len = sizeof (HeapTupleData) - sizeof (tup->t_bits);
+    
+    hoff = len = DOUBLEALIGN(len);     /* be conservative */
+    len += structlen;
+    tp = (char *) palloc(len);
+    tup = (HeapTuple) tp;
+    memset((char*)tup, 0, len);
+    
+    tup->t_len = (short) len;                  /* XXX */
+    tp += tup->t_hoff = hoff;
+    tup->t_natts = natts;
+    tup->t_infomask = 0;
+    
+    memmove(tp, structure, structlen);
+    
+    return (tup);
+}
diff --git a/src/backend/access/common/heapvalid.c b/src/backend/access/common/heapvalid.c

new file mode 100644 (file)

index 0000000..c806242
--- /dev/null
+++ b/src/backend/access/common/heapvalid.c
@@ -0,0 +1,134 @@
+/*-------------------------------------------------------------------------
+ *
+ * heapvalid.c--
+ *    heap tuple qualification validity checking code
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header$
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+
+#include "access/htup.h"
+#include "access/skey.h"
+#include "access/heapam.h"
+#include "utils/tqual.h"
+#include "access/valid.h"      /* where the declarations go */
+#include "access/xact.h"
+
+#include "storage/buf.h"
+#include "storage/bufmgr.h"
+#include "storage/bufpage.h"
+#include "storage/itemid.h"
+#include "fmgr.h"
+#include "utils/elog.h"
+#include "utils/rel.h"
+
+/* ----------------
+ *     heap_keytest
+ *
+ *     Test a heap tuple with respect to a scan key.
+ * ----------------
+ */
+bool
+heap_keytest(HeapTuple t,
+            TupleDesc tupdesc,
+            int nkeys,
+            ScanKey keys)
+{
+    bool       isnull;
+    Datum      atp;
+    int                test;
+    
+    for (; nkeys--; keys++) {
+       atp = (Datum)heap_getattr(t, InvalidBuffer,
+                                 keys->sk_attno, 
+                                 tupdesc,
+                                 &isnull);
+       
+       if (isnull)
+           /* XXX eventually should check if SK_ISNULL */
+           return false;
+       
+       if (keys->sk_flags & SK_COMMUTE)
+           test = (long) FMGR_PTR2(keys->sk_func, keys->sk_procedure,
+                                   keys->sk_argument, atp);
+       else
+           test = (long) FMGR_PTR2(keys->sk_func, keys->sk_procedure,
+                                   atp, keys->sk_argument);
+       
+       if (!test == !(keys->sk_flags & SK_NEGATE))
+           return false;
+    }
+    
+    return true;
+}
+
+/* ----------------
+ *     heap_tuple_satisfies
+ *
+ *  Returns a valid HeapTuple if it satisfies the timequal and keytest.
+ *  Returns NULL otherwise.  Used to be heap_satisifies (sic) which
+ *  returned a boolean.  It now returns a tuple so that we can avoid doing two
+ *  PageGetItem's per tuple.
+ *
+ *     Complete check of validity including LP_CTUP and keytest.
+ *     This should perhaps be combined with valid somehow in the
+ *     future.  (Also, additional rule tests/time range tests.)
+ *
+ *  on 8/21/92 mao says:  i rearranged the tests here to do keytest before
+ *  SatisfiesTimeQual.  profiling indicated that even for vacuumed relations,
+ *  time qual checking was more expensive than key testing.  time qual is
+ *  least likely to fail, too.  we should really add the time qual test to
+ *  the restriction and optimize it in the normal way.  this has interactions
+ *  with joey's expensive function work.
+ * ----------------
+ */
+HeapTuple
+heap_tuple_satisfies(ItemId itemId,
+                    Relation relation,
+                    PageHeader disk_page,
+                    TimeQual   qual,
+                    int nKeys,
+                    ScanKey key)
+{
+    HeapTuple  tuple;
+    bool res;
+    
+    if (! ItemIdIsUsed(itemId))
+       return NULL;
+    
+    tuple = (HeapTuple) PageGetItem((Page) disk_page, itemId);
+    
+    if (key != NULL)
+       res = heap_keytest(tuple, RelationGetTupleDescriptor(relation), 
+                          nKeys, key);
+    else
+       res = TRUE;
+    
+    if (res && (relation->rd_rel->relkind == RELKIND_UNCATALOGED
+               || HeapTupleSatisfiesTimeQual(tuple,qual)))
+       return tuple;
+    
+    return (HeapTuple) NULL;
+}
+
+/*
+ *  TupleUpdatedByCurXactAndCmd() -- Returns true if this tuple has
+ *     already been updated once by the current transaction/command
+ *     pair.
+ */
+bool
+TupleUpdatedByCurXactAndCmd(HeapTuple t)
+{
+    if (TransactionIdEquals(t->t_xmax,
+                           GetCurrentTransactionId()) &&
+       t->t_cmax == GetCurrentCommandId())
+       return true;
+    
+    return false;
+}
diff --git a/src/backend/access/common/indextuple.c b/src/backend/access/common/indextuple.c

new file mode 100644 (file)

index 0000000..ea38757
--- /dev/null
+++ b/src/backend/access/common/indextuple.c
@@ -0,0 +1,427 @@
+/*-------------------------------------------------------------------------
+ *
+ * indextuple.c--
+ *     This file contains index tuple accessor and mutator routines,
+ *     as well as a few various tuple utilities.
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header$
+ *
+ *-------------------------------------------------------------------------
+ */
+#include <string.h>
+
+#include "c.h"
+#include "access/ibit.h"
+#include "access/itup.h"       /* where the declarations go */
+#include "access/heapam.h"
+#include "access/genam.h"      
+#include "access/tupdesc.h"
+#include "access/tupmacs.h"
+
+#include "storage/itemptr.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+
+static Size IndexInfoFindDataOffset(unsigned short t_info);
+
+/* ----------------------------------------------------------------
+ *               index_ tuple interface routines
+ * ----------------------------------------------------------------
+ */
+
+/* ----------------
+ *     index_formtuple
+ * ----------------
+ */
+IndexTuple
+index_formtuple(TupleDesc tupleDescriptor,
+               Datum value[],
+               char null[])
+{
+    register char      *tp;    /* tuple pointer */
+    IndexTuple         tuple;  /* return tuple */
+    Size               size, hoff;
+    int                i;
+    unsigned short      infomask = 0;
+    bool               hasnull = false;
+    char               tupmask = 0;
+    int                 numberOfAttributes = tupleDescriptor->natts;
+    
+    if (numberOfAttributes > MaxIndexAttributeNumber)
+       elog(WARN, "index_formtuple: numberOfAttributes of %d > %d",
+            numberOfAttributes, MaxIndexAttributeNumber);
+    
+    
+    for (i = 0; i < numberOfAttributes && !hasnull; i++) {
+       if (null[i] != ' ') hasnull = true;
+    }
+    
+    if (hasnull) infomask |= INDEX_NULL_MASK;
+    
+    hoff = IndexInfoFindDataOffset(infomask);
+    size = hoff
+       + ComputeDataSize(tupleDescriptor,
+                         value, null);
+    size = DOUBLEALIGN(size);  /* be conservative */
+    
+    tp = (char *) palloc(size);
+    tuple = (IndexTuple) tp;
+    memset(tp,0,(int)size);
+    
+    DataFill((char *)tp + hoff,
+            tupleDescriptor,
+            value,
+            null,
+            &tupmask,
+            (hasnull ? (bits8*)tp + sizeof(*tuple) : NULL));
+    
+    /*
+     * We do this because DataFill wants to initialize a "tupmask" which
+     * is used for HeapTuples, but we want an indextuple infomask.  The only
+     * "relevent" info is the "has variable attributes" field, which is in
+     * mask position 0x02.  We have already set the null mask above.
+     */
+    
+    if (tupmask & 0x02) infomask |= INDEX_VAR_MASK;
+    
+    /*
+     * Here we make sure that we can actually hold the size.  We also want
+     * to make sure that size is not aligned oddly.  This actually is a
+     * rather odd way to make sure the size is not too large overall.
+     */
+    
+    if (size & 0xE000)
+       elog(WARN, "index_formtuple: data takes %d bytes: too big", size);
+
+    
+    infomask |= size;
+    
+    /* ----------------
+     * initialize metadata
+     * ----------------
+     */
+    tuple->t_info = infomask;
+    return (tuple);
+}
+
+/* ----------------
+ *     fastgetiattr
+ *
+ *     This is a newer version of fastgetiattr which attempts to be
+ *     faster by caching attribute offsets in the attribute descriptor.
+ *
+ *     an alternate way to speed things up would be to cache offsets
+ *     with the tuple, but that seems more difficult unless you take
+ *     the storage hit of actually putting those offsets into the
+ *     tuple you send to disk.  Yuck.
+ *
+ *     This scheme will be slightly slower than that, but should
+ *     preform well for queries which hit large #'s of tuples.  After
+ *     you cache the offsets once, examining all the other tuples using
+ *     the same attribute descriptor will go much quicker. -cim 5/4/91
+ * ----------------
+ */
+char *
+fastgetiattr(IndexTuple tup,
+            int attnum,
+            TupleDesc tupleDesc,
+            bool *isnull)
+{
+    register char              *tp;            /* ptr to att in tuple */
+    register char              *bp;            /* ptr to att in tuple */
+    int                        slow;           /* do we have to walk nulls? */
+    register int               data_off;       /* tuple data offset */
+    
+    /* ----------------
+     * sanity checks
+     * ----------------
+     */
+    
+    Assert(PointerIsValid(isnull));
+    Assert(attnum > 0);
+    
+    /* ----------------
+     *   Three cases:
+     * 
+     *   1: No nulls and no variable length attributes.
+     *   2: Has a null or a varlena AFTER att.
+     *   3: Has nulls or varlenas BEFORE att.
+     * ----------------
+     */
+    
+    *isnull =  false;
+    data_off = IndexTupleHasMinHeader(tup) ? sizeof *tup : 
+       IndexInfoFindDataOffset(tup->t_info);
+    
+    if (IndexTupleNoNulls(tup)) {
+       
+       /* first attribute is always at position zero */
+       
+       if (attnum == 1) {
+           return(fetchatt(&(tupleDesc->attrs[0]), (char *) tup + data_off));
+       }
+       attnum--;
+       
+       if (tupleDesc->attrs[attnum]->attcacheoff > 0) {
+           return(fetchatt(&(tupleDesc->attrs[attnum]),
+                           (char *) tup + data_off + 
+                           tupleDesc->attrs[attnum]->attcacheoff));
+       }
+       
+       tp = (char *) tup + data_off;
+       
+       slow = 0;
+    }else { /* there's a null somewhere in the tuple */
+       
+       bp = (char *) tup + sizeof(*tup); /* "knows" t_bits are here! */
+       slow = 0;
+       /* ----------------
+        *      check to see if desired att is null
+        * ----------------
+        */
+       
+       attnum--;
+       {
+           if (att_isnull(attnum, bp)) {
+               *isnull = true;
+               return NULL;
+           }
+       }
+       /* ----------------
+        *      Now check to see if any preceeding bits are null...
+        * ----------------
+        */
+       {
+           register int  i = 0; /* current offset in bp */
+           register int  mask;  /* bit in byte we're looking at */
+           register char n;     /* current byte in bp */
+           register int byte, finalbit;
+           
+           byte = attnum >> 3;
+           finalbit = attnum & 0x07;
+           
+           for (; i <= byte; i++) {
+               n = bp[i];
+               if (i < byte) {
+                   /* check for nulls in any "earlier" bytes */
+                   if ((~n) != 0) {
+                       slow++;
+                       break;
+                   }
+               } else {
+                   /* check for nulls "before" final bit of last byte*/
+                   mask = (finalbit << 1) - 1;
+                   if ((~n) & mask)
+                       slow++;
+               }
+           }
+       }
+       tp = (char *) tup + data_off;
+    }
+    
+    /* now check for any non-fixed length attrs before our attribute */
+    
+    if (!slow) {
+       if (tupleDesc->attrs[attnum]->attcacheoff > 0) {
+           return(fetchatt(&(tupleDesc->attrs[attnum]), 
+                           tp + tupleDesc->attrs[attnum]->attcacheoff));
+       }else if (!IndexTupleAllFixed(tup)) {
+           register int j = 0;
+           
+           for (j = 0; j < attnum && !slow; j++)
+               if (tupleDesc->attrs[j]->attlen < 1) slow = 1;
+       }
+    }
+    
+    /*
+     * if slow is zero, and we got here, we know that we have a tuple with
+     * no nulls.  We also know that we have to initialize the remainder of
+     * the attribute cached offset values.
+     */
+    
+    if (!slow) {
+       register int j = 1;
+       register long off;
+       
+       /*
+        * need to set cache for some atts
+        */
+       
+       tupleDesc->attrs[0]->attcacheoff = 0;
+       
+       while (tupleDesc->attrs[j]->attcacheoff > 0) j++;
+       
+       off = tupleDesc->attrs[j-1]->attcacheoff + 
+             tupleDesc->attrs[j-1]->attlen;
+       
+       for (; j < attnum + 1; j++) {
+           /*
+            * Fix me when going to a machine with more than a four-byte
+            * word!
+            */
+           
+           switch(tupleDesc->attrs[j]->attlen)
+               {
+               case -1:
+                   off = (tupleDesc->attrs[j]->attalign=='d')?
+                       DOUBLEALIGN(off):INTALIGN(off);
+                   break;
+               case sizeof(char):
+                   break;
+               case sizeof(short):
+                   off = SHORTALIGN(off);
+                   break;
+               case sizeof(int32):
+                   off = INTALIGN(off);
+                   break;
+               default:
+                   if (tupleDesc->attrs[j]->attlen > sizeof(int32))
+                       off = (tupleDesc->attrs[j]->attalign=='d')?
+                           DOUBLEALIGN(off) : LONGALIGN(off);
+                   else
+                       elog(WARN, "fastgetiattr: attribute %d has len %d",
+                            j, tupleDesc->attrs[j]->attlen);
+                   break;
+                   
+               }
+           
+           tupleDesc->attrs[j]->attcacheoff = off;
+           off += tupleDesc->attrs[j]->attlen;
+       }
+       
+       return(fetchatt( &(tupleDesc->attrs[attnum]), 
+                       tp + tupleDesc->attrs[attnum]->attcacheoff));
+    }else {
+       register bool usecache = true;
+       register int off = 0;
+       register int i;
+       
+       /*
+        * Now we know that we have to walk the tuple CAREFULLY.
+        */
+       
+       for (i = 0; i < attnum; i++) {
+           if (!IndexTupleNoNulls(tup)) {
+               if (att_isnull(i, bp)) {
+                   usecache = false;
+                   continue;
+               }
+           }
+               
+           if (usecache && tupleDesc->attrs[i]->attcacheoff > 0) {
+               off = tupleDesc->attrs[i]->attcacheoff;
+               if (tupleDesc->attrs[i]->attlen == -1) 
+                   usecache = false;
+               else
+                   continue;
+           }
+                   
+           if (usecache) tupleDesc->attrs[i]->attcacheoff = off;
+           switch(tupleDesc->attrs[i]->attlen)
+               {
+               case sizeof(char):
+                   off++;
+                   break;
+               case sizeof(short):
+                   off = SHORTALIGN(off) + sizeof(short);
+                   break;
+               case -1:
+                   usecache = false;
+                   off = (tupleDesc->attrs[i]->attalign=='d')?
+                       DOUBLEALIGN(off):INTALIGN(off);
+                   off += VARSIZE(tp + off);
+                   break;
+               default:
+                   if (tupleDesc->attrs[i]->attlen > sizeof(int32))
+                       off = (tupleDesc->attrs[i]->attalign=='d') ?
+                           DOUBLEALIGN(off) + tupleDesc->attrs[i]->attlen :
+                           LONGALIGN(off) + tupleDesc->attrs[i]->attlen;
+                   else
+                       elog(WARN, "fastgetiattr2: attribute %d has len %d",
+                            i, tupleDesc->attrs[i]->attlen);
+                   
+                   break;
+               }
+       }
+       
+       return(fetchatt(&tupleDesc->attrs[attnum], tp + off));
+    }
+}
+
+/* ----------------
+ *     index_getattr
+ * ----------------
+ */
+Datum
+index_getattr(IndexTuple tuple,
+             AttrNumber attNum,
+             TupleDesc tupDesc,
+             bool *isNullOutP)
+{
+    Assert (attNum > 0);
+
+    return (Datum)
+       fastgetiattr(tuple, attNum, tupDesc, isNullOutP);
+}
+
+RetrieveIndexResult
+FormRetrieveIndexResult(ItemPointer indexItemPointer,
+                       ItemPointer heapItemPointer)
+{
+    RetrieveIndexResult        result;
+    
+    Assert(ItemPointerIsValid(indexItemPointer));
+    Assert(ItemPointerIsValid(heapItemPointer));
+    
+    result = (RetrieveIndexResult) palloc(sizeof *result);
+    
+    result->index_iptr = *indexItemPointer;
+    result->heap_iptr = *heapItemPointer;
+    
+    return (result);
+}
+
+/*
+ * Takes an infomask as argument (primarily because this needs to be usable
+ * at index_formtuple time so enough space is allocated).
+ *
+ * Change me if adding an attribute to IndexTuples!!!!!!!!!!!
+ */
+static Size
+IndexInfoFindDataOffset(unsigned short t_info)
+{
+    if (!(t_info & INDEX_NULL_MASK))
+       return((Size) sizeof(IndexTupleData));
+    else {
+       Size size = sizeof(IndexTupleData);
+       
+       if (t_info & INDEX_NULL_MASK) {
+           size += sizeof(IndexAttributeBitMapData);
+       }
+       return DOUBLEALIGN(size);       /* be conservative */
+    }
+}
+
+/*
+ * Copies source into target.  If *target == NULL, we palloc space; otherwise
+ * we assume we have space that is already palloc'ed.
+ */
+void
+CopyIndexTuple(IndexTuple source, IndexTuple *target)
+{
+    Size size;
+    IndexTuple ret;
+    
+    size = IndexTupleSize(source);
+    if (*target == NULL) {
+       *target = (IndexTuple) palloc(size);
+    }
+    
+    ret = *target;
+    memmove((char*)ret, (char*)source, size);
+}
+
diff --git a/src/backend/access/common/indexvalid.c b/src/backend/access/common/indexvalid.c

new file mode 100644 (file)

index 0000000..64b6b25
--- /dev/null
+++ b/src/backend/access/common/indexvalid.c
@@ -0,0 +1,84 @@
+/*-------------------------------------------------------------------------
+ *
+ * indexvalid.c--
+ *    index tuple qualification validity checking code
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header$
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "executor/execdebug.h"
+#include "access/genam.h"
+#include "access/iqual.h"      /* where the declarations go */
+#include "access/itup.h"
+#include "access/skey.h"
+
+#include "storage/buf.h"
+#include "storage/bufpage.h"
+#include "storage/itemid.h"
+#include "utils/rel.h"
+
+/* ----------------------------------------------------------------
+ *               index scan key qualification code
+ * ----------------------------------------------------------------
+ */
+int    NIndexTupleProcessed;
+
+/* ----------------
+ *     index_keytest
+ *
+ * old comments
+ *     May eventually combine with other tests (like timeranges)?
+ *     Should have Buffer buffer; as an argument and pass it to amgetattr.
+ * ----------------
+ */
+bool
+index_keytest(IndexTuple tuple,
+             TupleDesc tupdesc,
+             int scanKeySize,
+             ScanKey key)
+{
+    bool           isNull;
+    Datum          datum;
+    int                    test;
+    
+    IncrIndexProcessed();
+    
+    while (scanKeySize > 0) {
+       datum = index_getattr(tuple,
+                             1,
+                             tupdesc,
+                             &isNull);
+       
+       if (isNull) {
+           /* XXX eventually should check if SK_ISNULL */
+           return (false);
+       }
+       
+       if (key[0].sk_flags & SK_COMMUTE) {
+           test = (int) (*(key[0].sk_func))
+               (DatumGetPointer(key[0].sk_argument),
+                datum);
+       } else {
+           test = (int) (*(key[0].sk_func))
+               (datum,
+                DatumGetPointer(key[0].sk_argument));
+       }
+       
+       if (!test == !(key[0].sk_flags & SK_NEGATE)) {
+           return (false);
+       }
+       
+       scanKeySize -= 1;
+       key++;
+    }
+    
+    return (true);
+}
+
diff --git a/src/backend/access/common/printtup.c b/src/backend/access/common/printtup.c

new file mode 100644 (file)

index 0000000..93e1fbf
--- /dev/null
+++ b/src/backend/access/common/printtup.c
@@ -0,0 +1,306 @@
+/*-------------------------------------------------------------------------
+ *
+ * printtup.c--
+ *    Routines to print out tuples to the destination (binary or non-binary
+ *    portals, frontend/interactive backend, etc.).
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header$
+ *
+ *-------------------------------------------------------------------------
+ */
+#include <sys/file.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "postgres.h"
+
+#include "access/heapam.h"
+#include "access/htup.h"
+#include "access/skey.h"
+#include "access/printtup.h"
+#include "access/tupdesc.h"
+#include "storage/buf.h"
+#include "utils/memutils.h"
+#include "utils/palloc.h"
+#include "fmgr.h"
+#include "utils/elog.h"
+
+#include "utils/syscache.h"
+#include "catalog/pg_type.h"
+
+#include "libpq/libpq.h"
+
+/* ----------------------------------------------------------------
+ *     printtup / debugtup support
+ * ----------------------------------------------------------------
+ */
+
+/* ----------------
+ *     typtoout - used by printtup and debugtup
+ * ----------------
+ */
+Oid
+typtoout(Oid type)
+{
+    HeapTuple  typeTuple;
+    
+    typeTuple = SearchSysCacheTuple(TYPOID,
+                                   ObjectIdGetDatum(type),
+                                   0, 0, 0);
+    
+    if (HeapTupleIsValid(typeTuple))
+       return((Oid)
+              ((TypeTupleForm) GETSTRUCT(typeTuple))->typoutput);
+    
+    elog(WARN, "typtoout: Cache lookup of type %d failed", type);
+    return(InvalidOid);
+}
+
+Oid
+gettypelem(Oid type)
+{
+    HeapTuple  typeTuple;
+    
+    typeTuple = SearchSysCacheTuple(TYPOID,
+                                   ObjectIdGetDatum(type),
+                                   0,0,0);
+    
+    if (HeapTupleIsValid(typeTuple))
+       return((Oid)
+              ((TypeTupleForm) GETSTRUCT(typeTuple))->typelem);
+    
+    elog(WARN, "typtoout: Cache lookup of type %d failed", type);
+    return(InvalidOid);
+}
+
+/* ----------------
+ *     printtup
+ * ----------------
+ */
+void
+printtup(HeapTuple tuple, TupleDesc typeinfo)
+{
+    int                i, j, k;
+    char       *outputstr, *attr;
+    bool       isnull;
+    Oid        typoutput;
+    
+    /* ----------------
+     * tell the frontend to expect new tuple data
+     * ----------------
+     */
+    pq_putnchar("D", 1);
+    
+    /* ----------------
+     * send a bitmap of which attributes are null
+     * ----------------
+     */
+    j = 0;
+    k = 1 << 7;
+    for (i = 0; i < tuple->t_natts; ) {
+       attr = heap_getattr(tuple, InvalidBuffer, ++i, typeinfo, &isnull);
+       if (!isnull)
+           j |= k;
+       k >>= 1;
+       if (!(i & 7)) {
+           pq_putint(j, 1);
+           j = 0;
+           k = 1 << 7;
+       }
+    }
+    if (i & 7)
+       pq_putint(j, 1);
+    
+    /* ----------------
+     * send the attributes of this tuple
+     * ----------------
+     */
+    for (i = 0; i < tuple->t_natts; ++i) {
+       attr = heap_getattr(tuple, InvalidBuffer, i+1, typeinfo, &isnull);
+       typoutput = typtoout((Oid) typeinfo->attrs[i]->atttypid);
+       
+       if (!isnull && OidIsValid(typoutput)) {
+           outputstr = fmgr(typoutput, attr, 
+                            gettypelem(typeinfo->attrs[i]->atttypid));
+           pq_putint(strlen(outputstr)+4, 4);
+           pq_putnchar(outputstr, strlen(outputstr));
+           pfree(outputstr);
+       }
+    }
+}
+
+/* ----------------
+ *     printatt
+ * ----------------
+ */
+static void
+printatt(unsigned attributeId,
+        AttributeTupleForm attributeP,
+        char *value)
+{
+    printf("\t%2d: %.*s%s%s%s\t(typeid = %u, len = %d, byval = %c)\n",
+          attributeId,
+          NAMEDATALEN,         /* attname is a char16 */
+          attributeP->attname.data,
+          value != NULL ? " = \"" : "",
+          value != NULL ? value : "",
+          value != NULL ? "\"" : "",
+          (unsigned int) (attributeP->atttypid),
+          attributeP->attlen,
+          attributeP->attbyval ? 't' : 'f');
+}
+
+/* ----------------
+ *     showatts
+ * ----------------
+ */
+void
+showatts(char *name, TupleDesc tupleDesc)
+{
+    int        i;
+    int natts = tupleDesc->natts;
+    AttributeTupleForm *attinfo = tupleDesc->attrs;
+
+    puts(name);
+    for (i = 0; i < natts; ++i)
+       printatt((unsigned) i+1, attinfo[i], (char *) NULL);
+    printf("\t----\n");
+}
+
+/* ----------------
+ *     debugtup
+ * ----------------
+ */
+void
+debugtup(HeapTuple tuple, TupleDesc typeinfo)
+{
+    register int       i;
+    char               *attr, *value;
+    bool               isnull;
+    Oid                typoutput;
+    
+    for (i = 0; i < tuple->t_natts; ++i) {
+       attr = heap_getattr(tuple, InvalidBuffer, i+1, typeinfo, &isnull);
+       typoutput = typtoout((Oid) typeinfo->attrs[i]->atttypid);
+       
+       if (!isnull && OidIsValid(typoutput)) {
+           value = fmgr(typoutput, attr, 
+                        gettypelem(typeinfo->attrs[i]->atttypid));
+           printatt((unsigned) i+1, typeinfo->attrs[i], value);
+           pfree(value);
+       }
+    }
+    printf("\t----\n");
+}
+
+/*#define IPORTAL_DEBUG*/
+
+/* ----------------
+ *     printtup_internal
+ *      Protocol expects either T, D, C, E, or N.
+ *      We use a different data prefix, e.g. 'B' instead of 'D' to
+ *      indicate a tuple in internal (binary) form.
+ *
+ *      This is same as printtup, except we don't use the typout func.
+ * ----------------
+ */
+void
+printtup_internal(HeapTuple tuple, TupleDesc typeinfo)
+{
+    int                i, j, k;
+    char       *attr;
+    bool       isnull;
+    
+    /* ----------------
+     * tell the frontend to expect new tuple data
+     * ----------------
+     */
+    pq_putnchar("B", 1);
+    
+    /* ----------------
+     * send a bitmap of which attributes are null
+     * ----------------
+     */
+    j = 0;
+    k = 1 << 7;
+    for (i = 0; i < tuple->t_natts; ) {
+       attr = heap_getattr(tuple, InvalidBuffer, ++i, typeinfo, &isnull);
+       if (!isnull)
+           j |= k;
+       k >>= 1;
+       if (!(i & 7)) {
+           pq_putint(j, 1);
+           j = 0;
+           k = 1 << 7;
+       }
+    }
+    if (i & 7)
+       pq_putint(j, 1);
+    
+    /* ----------------
+     * send the attributes of this tuple
+     * ----------------
+     */
+#ifdef IPORTAL_DEBUG
+    fprintf(stderr, "sending tuple with %d atts\n", tuple->t_natts);
+#endif
+    for (i = 0; i < tuple->t_natts; ++i) {
+       int32 len = typeinfo->attrs[i]->attlen;
+       
+       attr = heap_getattr(tuple, InvalidBuffer, i+1, typeinfo, &isnull);
+       if (!isnull) {
+           /* # of bytes, and opaque data */
+           if (len == -1) {
+               /* variable length, assume a varlena structure */
+               len = VARSIZE(attr) - VARHDRSZ;
+               
+               pq_putint(len, sizeof(int32));
+               pq_putnchar(VARDATA(attr), len);
+#ifdef IPORTAL_DEBUG
+               {
+                   char *d = VARDATA(attr);
+                   
+                   fprintf(stderr, "length %d data %x%x%x%x\n",
+                           len, *d, *(d+1), *(d+2), *(d+3));
+               }
+#endif
+           } else {
+               /* fixed size */
+               if (typeinfo->attrs[i]->attbyval) {
+                   int8 i8;
+                   int16 i16;
+                   int32 i32;
+                   
+                   pq_putint(len, sizeof(int32));
+                   switch (len) {
+                   case sizeof(int8):
+                       i8 = DatumGetChar(attr);
+                       pq_putnchar((char *) &i8, len);
+                       break;
+                   case sizeof(int16):
+                       i16 = DatumGetInt16(attr);
+                       pq_putnchar((char *) &i16, len);
+                       break;
+                   case sizeof(int32):
+                       i32 = DatumGetInt32(attr);
+                       pq_putnchar((char *) &i32, len);
+                       break;
+                   }
+#ifdef IPORTAL_DEBUG
+                   fprintf(stderr, "byval length %d data %d\n", len, attr);
+#endif
+               } else {
+                   pq_putint(len, sizeof(int32));
+                   pq_putnchar(attr, len);
+#ifdef IPORTAL_DEBUG
+                   fprintf(stderr, "byref length %d data %x\n", len, attr);
+#endif
+               }
+           }
+       }
+    }
+}
diff --git a/src/backend/access/common/scankey.c b/src/backend/access/common/scankey.c

new file mode 100644 (file)

index 0000000..3e7269d
--- /dev/null
+++ b/src/backend/access/common/scankey.c
@@ -0,0 +1,68 @@
+/*-------------------------------------------------------------------------
+ *
+ * scan.c--
+ *    scan direction and key code
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header$
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+#include "access/sdir.h"
+#include "access/attnum.h"
+#include "access/skey.h"
+
+#include "fmgr.h"
+
+/*
+ * ScanKeyEntryIsLegal --
+ *     True iff the scan key entry is legal.
+ */
+#define ScanKeyEntryIsLegal(entry) \
+    ((bool) (AssertMacro(PointerIsValid(entry)) && \
+            AttributeNumberIsValid(entry->sk_attno)))
+
+/*
+ * ScanKeyEntrySetIllegal --
+ *     Marks a scan key entry as illegal.
+ */
+void
+ScanKeyEntrySetIllegal(ScanKey entry)
+{
+
+    Assert(PointerIsValid(entry));
+    
+    entry->sk_flags = 0;       /* just in case... */
+    entry->sk_attno = InvalidAttrNumber;
+    entry->sk_procedure = 0;   /* should be InvalidRegProcedure */
+}
+
+/*
+ * ScanKeyEntryInitialize --
+ *     Initializes an scan key entry.
+ *
+ * Note:
+ *     Assumes the scan key entry is valid.
+ *     Assumes the intialized scan key entry will be legal.
+ */
+void
+ScanKeyEntryInitialize(ScanKey entry,
+                      bits16 flags,
+                      AttrNumber attributeNumber,
+                      RegProcedure procedure,
+                      Datum argument)
+{
+    Assert(PointerIsValid(entry));
+    
+    entry->sk_flags = flags;
+    entry->sk_attno = attributeNumber;
+    entry->sk_procedure = procedure;
+    entry->sk_argument = argument;
+    fmgr_info(procedure, &entry->sk_func, &entry->sk_nargs);
+    
+    Assert(ScanKeyEntryIsLegal(entry));
+}
diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c

new file mode 100644 (file)

index 0000000..7ea4346
--- /dev/null
+++ b/src/backend/access/common/tupdesc.c
@@ -0,0 +1,398 @@
+/*-------------------------------------------------------------------------
+ *
+ * tupdesc.c--
+ *    POSTGRES tuple descriptor support code
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header$
+ *
+ * NOTES
+ *    some of the executor utility code such as "ExecTypeFromTL" should be
+ *    moved here.
+ *
+ *-------------------------------------------------------------------------
+ */
+#include <stdio.h>     /* for sprintf() */
+#include <ctype.h>
+#include <string.h>
+
+#include "postgres.h"
+
+#include "nodes/pg_list.h"
+#include "nodes/parsenodes.h"
+
+#include "access/attnum.h"
+#include "access/htup.h"
+#include "access/tupdesc.h"
+
+#include "utils/builtins.h"
+#include "utils/elog.h"                /* XXX generate exceptions instead */
+#include "utils/palloc.h"
+
+#include "utils/syscache.h"
+#include "catalog/pg_type.h"
+
+#include "nodes/primnodes.h"
+
+#include "parser/catalog_utils.h"
+
+/* ----------------------------------------------------------------
+ *     CreateTemplateTupleDesc
+ *
+ *     This function allocates and zeros a tuple descriptor structure.
+ * ----------------------------------------------------------------
+ */
+TupleDesc
+CreateTemplateTupleDesc(int natts)
+{
+    uint32     size;
+    TupleDesc desc;
+    
+    /* ----------------
+     * sanity checks
+     * ----------------
+     */
+    AssertArg(natts >= 1);
+    
+    /* ----------------
+     *  allocate enough memory for the tuple descriptor and
+     *  zero it as TupleDescInitEntry assumes that the descriptor
+     *  is filled with NULL pointers.
+     * ----------------
+     */
+    size = natts * sizeof (AttributeTupleForm);
+    desc = (TupleDesc) palloc(sizeof(struct tupleDesc));
+    desc->attrs = (AttributeTupleForm*) palloc(size);
+    memset(desc->attrs, 0, size);
+
+    desc->natts = natts;
+
+    return (desc);
+}
+
+/* ----------------------------------------------------------------
+ *     CreateTupleDesc
+ *
+ *     This function allocates a new TupleDesc from AttributeTupleForm array
+ * ----------------------------------------------------------------
+ */
+TupleDesc
+CreateTupleDesc(int natts, AttributeTupleForm* attrs)
+{
+    TupleDesc desc;
+    
+    /* ----------------
+     * sanity checks
+     * ----------------
+     */
+    AssertArg(natts >= 1);
+    
+    desc = (TupleDesc) palloc(sizeof(struct tupleDesc));
+    desc->attrs = attrs;
+    desc->natts = natts;    
+
+
+    return (desc);
+}
+
+/* ----------------------------------------------------------------
+ *     CreateTupleDescCopy
+ *
+ *     This function creates a new TupleDesc by copying from an existing
+ *      TupleDesc
+ * 
+ * ----------------------------------------------------------------
+ */
+TupleDesc
+CreateTupleDescCopy(TupleDesc tupdesc)
+{
+    TupleDesc desc;
+    int i, size;
+
+    desc = (TupleDesc) palloc(sizeof(struct tupleDesc));
+    desc->natts = tupdesc->natts;
+    size = desc->natts * sizeof (AttributeTupleForm);
+    desc->attrs = (AttributeTupleForm*) palloc(size);
+    for (i=0;i<desc->natts;i++) {
+       desc->attrs[i] = 
+           (AttributeTupleForm)palloc(ATTRIBUTE_TUPLE_SIZE);
+       memmove(desc->attrs[i],
+               tupdesc->attrs[i],
+               ATTRIBUTE_TUPLE_SIZE);
+    }
+    return desc;
+}
+
+/* ----------------------------------------------------------------
+ *     TupleDescInitEntry
+ *
+ *     This function initializes a single attribute structure in
+ *     a preallocated tuple descriptor.
+ * ----------------------------------------------------------------
+ */
+bool
+TupleDescInitEntry(TupleDesc desc,
+                  AttrNumber attributeNumber,
+                  char *attributeName,
+                  char *typeName,
+                  int attdim,
+                  bool attisset)
+{
+    HeapTuple          tuple;
+    TypeTupleForm      typeForm;
+    AttributeTupleForm att;
+    
+    /* ----------------
+     * sanity checks
+     * ----------------
+     */
+    AssertArg(PointerIsValid(desc));
+    AssertArg(attributeNumber >= 1);
+    /* attributeName's are sometimes NULL, 
+       from resdom's.  I don't know why that is, though -- Jolly */
+/*    AssertArg(NameIsValid(attributeName));*/
+/*    AssertArg(NameIsValid(typeName));*/
+    
+    AssertArg(!PointerIsValid(desc->attrs[attributeNumber - 1]));
+    
+
+    /* ----------------
+     * allocate storage for this attribute
+     * ----------------
+     */
+
+    att = (AttributeTupleForm) palloc(ATTRIBUTE_TUPLE_SIZE);
+    desc->attrs[attributeNumber - 1] = att;
+
+    /* ----------------
+     * initialize some of the attribute fields
+     * ----------------
+     */
+    att->attrelid  = 0;                                /* dummy value */
+    
+    if (attributeName != NULL)
+       namestrcpy(&(att->attname), attributeName);
+    else
+       memset(att->attname.data,0,NAMEDATALEN);
+
+    
+    att->attdefrel =   0;                      /* dummy value */
+    att->attnvals  =   0;                      /* dummy value */
+    att->atttyparg =   0;                      /* dummy value */
+    att->attbound =    0;                      /* dummy value */
+    att->attcanindex =         0;                      /* dummy value */
+    att->attproc =     0;                      /* dummy value */
+    att->attcacheoff =         -1;
+    
+    att->attnum = attributeNumber;
+    att->attnelems = attdim;
+    att->attisset = attisset;
+    
+    /* ----------------
+     * search the system cache for the type tuple of the attribute
+     *  we are creating so that we can get the typeid and some other
+     *  stuff.
+     *
+     *  Note: in the special case of 
+     *
+     *     create EMP (name = char16, manager = EMP)
+     *
+     *  RelationNameCreateHeapRelation() calls BuildDesc() which
+     *  calls this routine and since EMP does not exist yet, the
+     *  system cache lookup below fails.  That's fine, but rather
+     *  then doing a elog(WARN) we just leave that information
+     *  uninitialized, return false, then fix things up later.
+     *  -cim 6/14/90
+     * ----------------
+     */
+    tuple = SearchSysCacheTuple(TYPNAME, PointerGetDatum(typeName),
+                               0,0,0);
+    if (! HeapTupleIsValid(tuple)) {
+       /* ----------------
+        *   here type info does not exist yet so we just fill
+        *   the attribute with dummy information and return false.
+        * ----------------
+        */
+       att->atttypid = InvalidOid;
+       att->attlen   = (int16) 0;
+       att->attbyval = (bool) 0;
+       att->attalign = 'i';
+       return false;
+    }
+    
+    /* ----------------
+     * type info exists so we initialize our attribute
+     *  information from the type tuple we found..
+     * ----------------
+     */
+    typeForm = (TypeTupleForm) GETSTRUCT(tuple);
+    
+    att->atttypid = tuple->t_oid;
+    att->attalign = typeForm->typalign;
+    
+    /* ------------------------
+       If this attribute is a set, what is really stored in the
+       attribute is the OID of a tuple in the pg_proc catalog.
+       The pg_proc tuple contains the query string which defines
+       this set - i.e., the query to run to get the set.
+       So the atttypid (just assigned above) refers to the type returned
+       by this query, but the actual length of this attribute is the
+       length (size) of an OID.
+       
+       Why not just make the atttypid point to the OID type, instead
+       of the type the query returns?  Because the executor uses the atttypid
+       to tell the front end what type will be returned (in BeginCommand),
+       and in the end the type returned will be the result of the query, not
+       an OID.
+       
+       Why not wait until the return type of the set is known (i.e., the
+       recursive call to the executor to execute the set has returned) 
+       before telling the front end what the return type will be?  Because
+       the executor is a delicate thing, and making sure that the correct
+       order of front-end commands is maintained is messy, especially 
+       considering that target lists may change as inherited attributes
+       are considered, etc.  Ugh.
+       -----------------------------------------
+       */
+    if (attisset) {
+       Type t = type("oid");
+       att->attlen = tlen(t);
+       att->attbyval = tbyval(t);
+    } else {
+       att->attlen   = typeForm->typlen;
+       att->attbyval = typeForm->typbyval;
+    }
+    
+    
+    return true;
+}
+
+
+/* ----------------------------------------------------------------
+ *     TupleDescMakeSelfReference
+ *
+ *     This function initializes a "self-referential" attribute like
+ *      manager in "create EMP (name=text, manager = EMP)".
+ *     It calls TypeShellMake() which inserts a "shell" type
+ *     tuple into pg_type.  A self-reference is one kind of set, so
+ *      its size and byval are the same as for a set.  See the comments
+ *      above in TupleDescInitEntry.
+ * ----------------------------------------------------------------
+ */
+static void
+TupleDescMakeSelfReference(TupleDesc desc,
+                          AttrNumber attnum,
+                          char *relname)
+{
+    AttributeTupleForm att;
+    Type t = type("oid");
+    
+    att = desc->attrs[attnum-1];
+    att->atttypid = TypeShellMake(relname);
+    att->attlen   = tlen(t);
+    att->attbyval = tbyval(t);
+    att->attnelems = 0;
+}
+
+/* ----------------------------------------------------------------
+ *     BuildDescForRelation
+ *
+ *     This is a general purpose function identical to BuildDesc
+ *     but is used by the DefineRelation() code to catch the
+ *     special case where you
+ *
+ *             create FOO ( ..., x = FOO )
+ *
+ *     here, the initial type lookup for "x = FOO" will fail
+ *     because FOO isn't in the catalogs yet.  But since we
+ *     are creating FOO, instead of doing an elog() we add
+ *     a shell type tuple to pg_type and fix things later
+ *     in amcreate().
+ * ----------------------------------------------------------------
+ */
+TupleDesc
+BuildDescForRelation(List *schema, char *relname)
+{
+    int                        natts;
+    AttrNumber         attnum;
+    List               *p;
+    TupleDesc          desc;
+    char               *attname;
+    char               *typename;
+    int                        attdim;
+    bool                attisset;
+    
+    /* ----------------
+     * allocate a new tuple descriptor
+     * ----------------
+     */
+    natts =    length(schema);
+    desc =     CreateTemplateTupleDesc(natts);
+    
+    attnum = 0;
+    
+    typename = palloc(NAMEDATALEN+1);
+
+    foreach(p, schema) {
+       ColumnDef *entry;
+       List    *arry;
+
+       /* ----------------
+        *      for each entry in the list, get the name and type
+        *      information from the list and have TupleDescInitEntry
+        *      fill in the attribute information we need.
+        * ----------------
+        */     
+       attnum++;
+       
+       entry =         lfirst(p);
+       attname =       entry->colname;
+       arry = entry->typename->arrayBounds;
+       attisset = entry->typename->setof;
+
+       if (arry != NIL) {
+           char buf[20];
+           
+           attdim = length(arry);
+           
+           /* array of XXX is _XXX (inherited from release 3) */
+           sprintf(buf, "_%.*s", NAMEDATALEN, entry->typename->name);
+           strcpy(typename, buf);
+       } else {
+           strcpy(typename, entry->typename->name);
+           attdim = 0;
+       }
+       
+       if (! TupleDescInitEntry(desc, attnum, attname, 
+                                typename, attdim, attisset)) {
+           /* ----------------
+            *  if TupleDescInitEntry() fails, it means there is
+            *  no type in the system catalogs.  So now we check if
+            *  the type name equals the relation name.  If so we
+            *  have a self reference, otherwise it's an error.
+            * ----------------
+            */
+           if (!strcmp(typename, relname)) {
+               TupleDescMakeSelfReference(desc, attnum, relname);
+           } else
+               elog(WARN, "DefineRelation: no such type %.*s", 
+                    NAMEDATALEN, typename);
+       }
+
+       /*
+        * this is for char() and varchar(). When an entry is of type
+        * char() or varchar(), typlen is set to the appropriate length,
+        * which we'll use here instead. (The catalog lookup only returns
+        * the length of bpchar and varchar which is not what we want!)
+        *                                              - ay 6/95
+        */
+       if (entry->typename->typlen > 0) {
+           desc->attrs[attnum - 1]->attlen = entry->typename->typlen;
+       }
+    }
+    return desc;
+}
+
diff --git a/src/backend/access/funcindex.h b/src/backend/access/funcindex.h

new file mode 100644 (file)

index 0000000..3d62099
--- /dev/null
+++ b/src/backend/access/funcindex.h
@@ -0,0 +1,43 @@
+/*-------------------------------------------------------------------------
+ *
+ * funcindex.h--
+ *    
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id$
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef _FUNC_INDEX_INCLUDED_
+#define _FUNC_INDEX_INCLUDED_
+
+#include "postgres.h"
+
+typedef struct {
+       int     nargs;
+       Oid     arglist[8];
+       Oid     procOid;
+       NameData funcName;
+} FuncIndexInfo;
+
+typedef FuncIndexInfo  *FuncIndexInfoPtr;
+
+/*
+ * some marginally useful macro definitions
+ */
+/* #define FIgetname(FINFO) (&((FINFO)->funcName.data[0]))*/
+#define FIgetname(FINFO) (FINFO)->funcName.data
+#define FIgetnArgs(FINFO) (FINFO)->nargs
+#define FIgetProcOid(FINFO) (FINFO)->procOid
+#define FIgetArg(FINFO, argnum) (FINFO)->arglist[argnum]
+#define FIgetArglist(FINFO) (FINFO)->arglist
+
+#define FIsetnArgs(FINFO, numargs) ((FINFO)->nargs = numargs)
+#define FIsetProcOid(FINFO, id) ((FINFO)->procOid = id)
+#define FIsetArg(FINFO, argnum, argtype) ((FINFO)->arglist[argnum] = argtype)
+
+#define FIisFunctionalIndex(FINFO) (FINFO->procOid != InvalidOid)
+
+#endif /* FUNCINDEX_H */
diff --git a/src/backend/access/genam.h b/src/backend/access/genam.h

new file mode 100644 (file)

index 0000000..0e7e28d
--- /dev/null
+++ b/src/backend/access/genam.h
@@ -0,0 +1,60 @@
+/*-------------------------------------------------------------------------
+ *
+ * genam.h--
+ *    POSTGRES general access method definitions.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id$
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef        GENAM_H
+#define GENAM_H
+
+#include "postgres.h"
+
+#include "access/attnum.h"
+#include "access/htup.h"
+#include "access/istrat.h"
+#include "access/itup.h"
+#include "access/relscan.h"
+#include "access/skey.h"
+#include "access/sdir.h"
+#include "access/funcindex.h"
+
+/* ----------------
+ *     generalized index_ interface routines
+ * ----------------
+ */
+extern Relation index_open(Oid relationId);
+extern Relation index_openr(char *relationName);
+extern void index_close(Relation relation);
+extern InsertIndexResult index_insert(Relation relation,
+                                     IndexTuple indexTuple);
+extern void index_delete(Relation relation, ItemPointer indexItem);
+extern IndexScanDesc index_beginscan(Relation relation, bool scanFromEnd,
+     uint16 numberOfKeys, ScanKey key);
+extern void index_rescan(IndexScanDesc scan, bool scanFromEnd, ScanKey key);
+extern void index_endscan(IndexScanDesc scan);
+extern void index_markpos(IndexScanDesc scan);
+extern void index_restrpos(IndexScanDesc scan);
+extern RetrieveIndexResult index_getnext(IndexScanDesc scan,
+                                        ScanDirection direction);
+extern RegProcedure index_getprocid(Relation irel, AttrNumber attnum,
+                                   uint16 procnum);
+extern Datum GetIndexValue(HeapTuple tuple, TupleDesc hTupDesc,
+     int attOff, AttrNumber attrNums[], FuncIndexInfo *fInfo,
+     bool *attNull, Buffer buffer);
+
+/* in genam.c */
+extern IndexScanDesc RelationGetIndexScan(Relation relation, bool scanFromEnd,
+                                         uint16 numberOfKeys, ScanKey key);
+extern void IndexScanRestart(IndexScanDesc scan, bool scanFromEnd,
+                            ScanKey key);
+extern void IndexScanEnd(IndexScanDesc scan);
+extern void IndexScanMarkPosition(IndexScanDesc scan);
+extern void IndexScanRestorePosition(IndexScanDesc scan);
+
+#endif /* GENAM_H */
diff --git a/src/backend/access/hash.h b/src/backend/access/hash.h

new file mode 100644 (file)

index 0000000..daa9c7e
--- /dev/null
+++ b/src/backend/access/hash.h
@@ -0,0 +1,336 @@
+/*-------------------------------------------------------------------------
+ *
+ * hash.h--
+ *    header file for postgres hash access method implementation 
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id$
+ *
+ * NOTES
+ *     modeled after Margo Seltzer's hash implementation for unix. 
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef HASH_H
+#define HASH_H
+
+#include "access/itup.h"
+
+/* 
+ * An overflow page is a spare page allocated for storing data whose 
+ * bucket doesn't have room to store it. We use overflow pages rather
+ * than just splitting the bucket because there is a linear order in
+ * the way we split buckets. In other words, if there isn't enough space
+ * in the bucket itself, put it in an overflow page. 
+ *
+ * Overflow page addresses are stored in form: (Splitnumber, Page offset).
+ *
+ * A splitnumber is the number of the generation where the table doubles
+ * in size. The ovflpage's offset within the splitnumber; offsets start
+ * at 1. 
+ * 
+ * We convert the stored bitmap address into a page address with the
+ * macro OADDR_OF(S, O) where S is the splitnumber and O is the page 
+ * offset. 
+ */
+typedef uint32         Bucket;
+typedef bits16 OverflowPageAddress;
+typedef uint32 SplitNumber;
+typedef uint32  PageOffset;
+
+/* A valid overflow address will always have a page offset >= 1 */
+#define InvalidOvflAddress     0       
+                                          
+#define SPLITSHIFT     11
+#define SPLITMASK      0x7FF
+#define SPLITNUM(N)    ((SplitNumber)(((uint32)(N)) >> SPLITSHIFT))
+#define OPAGENUM(N)    ((PageOffset)((N) & SPLITMASK))
+#define        OADDR_OF(S,O)   ((OverflowPageAddress)((uint32)((uint32)(S) << SPLITSHIFT) + (O)))
+
+#define BUCKET_TO_BLKNO(B) \
+       ((Bucket) ((B) + ((B) ? metap->SPARES[_hash_log2((B)+1)-1] : 0)) + 1)
+#define OADDR_TO_BLKNO(B)       \
+       ((BlockNumber) \
+        (BUCKET_TO_BLKNO ( (1 << SPLITNUM((B))) -1 ) + OPAGENUM((B))));
+
+/* 
+ * hasho_flag tells us which type of page we're looking at.  For
+ * example, knowing overflow pages from bucket pages is necessary
+ * information when you're deleting tuples from a page. If all the
+ * tuples are deleted from an overflow page, the overflow is made
+ * available to other buckets by calling _hash_freeovflpage(). If all
+ * the tuples are deleted from a bucket page, no additional action is
+ * necessary.
+ */
+
+#define        LH_UNUSED_PAGE          (0)
+#define LH_OVERFLOW_PAGE       (1 << 0)
+#define LH_BUCKET_PAGE         (1 << 1)
+#define        LH_BITMAP_PAGE          (1 << 2)
+#define        LH_META_PAGE            (1 << 3)
+
+typedef struct HashPageOpaqueData {
+    bits16 hasho_flag;                 /* is this page a bucket or ovfl */
+    Bucket hasho_bucket;               /* bucket number this pg belongs to */
+    OverflowPageAddress hasho_oaddr;   /* ovfl address of this ovfl pg */
+    BlockNumber hasho_nextblkno;       /* next ovfl blkno */
+    BlockNumber        hasho_prevblkno;        /* previous ovfl (or bucket) blkno */
+} HashPageOpaqueData;
+
+typedef HashPageOpaqueData        *HashPageOpaque;
+
+/*
+ *  ScanOpaqueData is used to remember which buffers we're currently
+ *  examining in the scan.  We keep these buffers locked and pinned and
+ *  recorded in the opaque entry of the scan in order to avoid doing a
+ *  ReadBuffer() for every tuple in the index.  This avoids semop() calls,
+ *  which are expensive.
+ */
+
+typedef struct HashScanOpaqueData {
+    Buffer      hashso_curbuf;
+    Buffer      hashso_mrkbuf;
+} HashScanOpaqueData;
+
+typedef HashScanOpaqueData        *HashScanOpaque;
+
+/* 
+ * Definitions for metapage.
+ */
+
+#define HASH_METAPAGE  0               /* metapage is always block 0 */
+
+#define HASH_MAGIC     0x6440640
+#define HASH_VERSION   0
+
+/*
+ * NCACHED is used to set the array sizeof spares[] & bitmaps[].
+ *
+ * Spares[] is used to hold the number overflow pages currently
+ * allocated at a certain splitpoint. For example, if spares[3] = 7
+ * then there are a maximum of 7 ovflpages available at splitpoint 3.
+ * The value in spares[] will change as ovflpages are added within
+ * a splitpoint. 
+ * 
+ * Within a splitpoint, one can find which ovflpages are available and
+ * which are used by looking at a bitmaps that are stored on the ovfl
+ * pages themselves. There is at least one bitmap for every splitpoint's
+ * ovflpages. Bitmaps[] contains the ovflpage addresses of the ovflpages 
+ * that hold the ovflpage bitmaps. 
+ *
+ * The reason that the size is restricted to NCACHED (32) is because
+ * the bitmaps are 16 bits: upper 5 represent the splitpoint, lower 11
+ * indicate the page number within the splitpoint. Since there are 
+ * only 5 bits to store the splitpoint, there can only be 32 splitpoints. 
+ * Both spares[] and bitmaps[] use splitpoints as there indices, so there
+ * can only be 32 of them. 
+ */
+
+#define        NCACHED         32      
+
+
+typedef struct HashMetaPageData {
+    PageHeaderData     hashm_phdr;             /* pad for page header
+                                                  (do not use) */
+    uint32             hashm_magic;            /* magic no. for hash tables */
+    uint32             hashm_version;          /* version ID */
+    uint32             hashm_nkeys;            /* number of keys stored in
+                                                  the table */
+    uint16             hashm_ffactor;          /* fill factor */
+    uint16             hashm_bsize;            /* bucket size (bytes) -
+                                                  must be a power of 2 */
+    uint16             hashm_bshift;           /* bucket shift */
+    uint16             hashm_bmsize;           /* bitmap array size (bytes) -
+                                                  must be a power of 2 */
+    uint32             hashm_maxbucket;        /* ID of maximum bucket
+                                                  in use */
+    uint32             hashm_highmask;         /* mask to modulo into
+                                                  entire table */
+    uint32             hashm_lowmask;          /* mask to modulo into lower
+                                                  half of table */
+    uint32             hashm_ovflpoint;        /* pageno. from which ovflpgs
+                                                  being allocated */
+    uint32             hashm_lastfreed;        /* last ovflpage freed */
+    uint32             hashm_nmaps;            /* Initial number of bitmaps */
+    uint32             hashm_spares[NCACHED];  /* spare pages available at
+                                                  splitpoints */
+    BlockNumber                hashm_mapp[NCACHED];    /* blknumbers of ovfl page
+                                                  maps */
+    RegProcedure       hashm_procid;           /* hash procedure id from
+                                                  pg_proc */
+} HashMetaPageData;
+
+typedef HashMetaPageData *HashMetaPage;
+
+/* Short hands for accessing structure */
+#define BSHIFT         hashm_bshift
+#define OVFL_POINT     hashm_ovflpoint
+#define        LAST_FREED      hashm_lastfreed
+#define MAX_BUCKET     hashm_maxbucket
+#define FFACTOR                hashm_ffactor
+#define HIGH_MASK      hashm_highmask
+#define LOW_MASK       hashm_lowmask
+#define NKEYS          hashm_nkeys
+#define SPARES         hashm_spares
+
+extern bool    BuildingHash;
+
+typedef struct HashItemData {
+    IndexTupleData          hash_itup;
+} HashItemData;
+
+typedef HashItemData      *HashItem;
+
+/*
+ * Constants
+ */
+#define DEFAULT_FFACTOR                300
+#define SPLITMAX               8
+#define BYTE_TO_BIT            3       /* 2^3 bits/byte */
+#define INT_TO_BYTE            2       /* 2^2 bytes/int */
+#define INT_TO_BIT             5       /* 2^5 bits/int */
+#define ALL_SET                        ((uint32) ~0)
+
+/*
+ * bitmap pages do not contain tuples.  they do contain the standard
+ * page headers and trailers; however, everything in between is a
+ * giant bit array.  the number of bits that fit on a page obviously
+ * depends on the page size and the header/trailer overhead.
+ */
+#define        BMPGSZ_BYTE(metap)      ((metap)->hashm_bmsize)
+#define        BMPGSZ_BIT(metap)       ((metap)->hashm_bmsize << BYTE_TO_BIT)
+#define        HashPageGetBitmap(pg) \
+    ((uint32 *) (((char *) (pg)) + DOUBLEALIGN(sizeof(PageHeaderData))))
+
+/*
+ * The number of bits in an ovflpage bitmap which
+ * tells which ovflpages are empty versus in use (NOT the number of
+ * bits in an overflow page *address* bitmap). 
+ */
+#define BITS_PER_MAP   32      /* Number of bits in ovflpage bitmap */
+
+/* Given the address of the beginning of a big map, clear/set the nth bit */
+#define CLRBIT(A, N)   ((A)[(N)/BITS_PER_MAP] &= ~(1<<((N)%BITS_PER_MAP)))
+#define SETBIT(A, N)   ((A)[(N)/BITS_PER_MAP] |= (1<<((N)%BITS_PER_MAP)))
+#define ISSET(A, N)    ((A)[(N)/BITS_PER_MAP] & (1<<((N)%BITS_PER_MAP)))
+
+/*
+ * page locking modes
+ */
+#define        HASH_READ       0
+#define        HASH_WRITE      1
+
+/*  
+ *  In general, the hash code tries to localize its knowledge about page
+ *  layout to a couple of routines.  However, we need a special value to
+ *  indicate "no page number" in those places where we expect page numbers.
+ */
+
+#define P_NONE         0
+
+/*
+ *  Strategy number. There's only one valid strategy for hashing: equality.
+ */
+
+#define HTEqualStrategyNumber          1
+#define HTMaxStrategyNumber            1
+
+/*
+ *  When a new operator class is declared, we require that the user supply
+ *  us with an amproc procudure for hashing a key of the new type.
+ *  Since we only have one such proc in amproc, it's number 1.
+ */
+
+#define HASHPROC       1
+
+/* public routines */
+
+extern void hashbuild(Relation heap, Relation index, int natts,
+       AttrNumber *attnum, IndexStrategy istrat, uint16 pcount,
+       Datum *params, FuncIndexInfo *finfo, PredInfo *predInfo);
+extern InsertIndexResult hashinsert(Relation rel, IndexTuple itup);
+extern char *hashgettuple(IndexScanDesc scan, ScanDirection dir);
+extern char *hashbeginscan(Relation rel, bool fromEnd, uint16 keysz,
+                          ScanKey scankey);
+extern void hashrescan(IndexScanDesc scan, bool fromEnd, ScanKey scankey);
+extern void hashendscan(IndexScanDesc scan);
+extern void hashmarkpos(IndexScanDesc scan);
+extern void hashrestrpos(IndexScanDesc scan);
+extern void hashdelete(Relation rel, ItemPointer tid);
+
+/* hashfunc.c */
+extern uint32 hashint2(int16 key);
+extern uint32 hashint4(uint32 key);
+extern uint32 hashfloat4(float32 keyp);
+extern uint32 hashfloat8(float64 keyp);
+extern uint32 hashoid(Oid key);
+extern uint32 hashchar(char key);
+extern uint32 hashchar2(uint16 intkey);
+extern uint32 hashchar4(uint32 intkey);
+extern uint32 hashchar8(char *key);
+extern uint32 hashchar16(char *key);
+extern uint32 hashtext(struct varlena *key);
+
+/* private routines */
+
+/* hashinsert.c */
+extern InsertIndexResult _hash_doinsert(Relation rel, HashItem hitem);
+
+
+/* hashovfl.c */
+extern Buffer _hash_addovflpage(Relation rel, Buffer *metabufp, Buffer buf);
+extern Buffer _hash_freeovflpage(Relation rel, Buffer ovflbuf);
+extern int32 _hash_initbitmap(Relation rel, HashMetaPage metap, int32 pnum,
+                             int32 nbits, int32 ndx);
+extern void _hash_squeezebucket(Relation rel, HashMetaPage metap,
+                               Bucket bucket);
+
+
+/* hashpage.c */
+extern void _hash_metapinit(Relation rel);
+extern Buffer _hash_getbuf(Relation rel, BlockNumber blkno, int access);
+extern void _hash_relbuf(Relation rel, Buffer buf, int access);
+extern void _hash_wrtbuf(Relation rel, Buffer buf);
+extern void _hash_wrtnorelbuf(Relation rel, Buffer buf);
+extern Page _hash_chgbufaccess(Relation rel, Buffer *bufp, int from_access,
+                              int to_access);
+extern void _hash_pageinit(Page page, Size size);
+extern void _hash_pagedel(Relation rel, ItemPointer tid);
+extern void _hash_expandtable(Relation rel, Buffer metabuf);
+
+
+/* hashscan.c */
+extern void _hash_regscan(IndexScanDesc scan);
+extern void _hash_dropscan(IndexScanDesc scan);
+extern void _hash_adjscans(Relation rel, ItemPointer tid);
+
+
+/* hashsearch.c */
+extern void _hash_search(Relation rel, int keysz, ScanKey scankey,
+                        Buffer *bufP, HashMetaPage metap);
+extern RetrieveIndexResult _hash_next(IndexScanDesc scan, ScanDirection dir);
+extern RetrieveIndexResult _hash_first(IndexScanDesc scan, ScanDirection dir);
+extern bool _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir,
+                      Buffer metabuf);
+
+
+/* hashstrat.c */
+extern StrategyNumber _hash_getstrat(Relation rel, AttrNumber attno,
+                                    RegProcedure proc);
+extern bool _hash_invokestrat(Relation rel, AttrNumber attno,
+                             StrategyNumber strat, Datum left, Datum right);
+
+
+/* hashutil.c */
+extern ScanKey _hash_mkscankey(Relation rel, IndexTuple itup,
+                              HashMetaPage metap);
+extern void _hash_freeskey(ScanKey skey);
+extern bool _hash_checkqual(IndexScanDesc scan, IndexTuple itup);
+extern HashItem _hash_formitem(IndexTuple itup);
+extern Bucket _hash_call(Relation rel, HashMetaPage metap, Datum key);
+extern uint32 _hash_log2(uint32 num);
+extern void _hash_checkpage(Page page, int flags);
+
+#endif /* HASH_H */
diff --git a/src/backend/access/hash/Makefile.inc b/src/backend/access/hash/Makefile.inc

new file mode 100644 (file)

index 0000000..74920d6
--- /dev/null
+++ b/src/backend/access/hash/Makefile.inc
@@ -0,0 +1,18 @@
+#-------------------------------------------------------------------------
+#
+# Makefile.inc--
+#    Makefile for access/hash (hash access method)
+#
+# Copyright (c) 1994, Regents of the University of California
+#
+#
+# IDENTIFICATION
+#    $Header$
+#
+#-------------------------------------------------------------------------
+
+SUBSRCS+= hash.c hashfunc.c hashinsert.c hashovfl.c hashpage.c hashscan.c \
+       hashsearch.c hashstrat.c hashutil.c
+
+
+
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c

new file mode 100644 (file)

index 0000000..c6b570d
--- /dev/null
+++ b/src/backend/access/hash/hash.c
@@ -0,0 +1,467 @@
+/*-------------------------------------------------------------------------
+ *
+ * hash.c--
+ *    Implementation of Margo Seltzer's Hashing package for postgres. 
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header$
+ *
+ * NOTES
+ *    This file contains only the public interface routines.
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "storage/bufmgr.h"
+#include "storage/bufpage.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/rel.h"
+#include "utils/excid.h"
+#include "access/heapam.h"
+#include "access/genam.h"
+#include "access/sdir.h"
+#include "access/hash.h"
+#include "access/funcindex.h"
+#include "nodes/execnodes.h"
+#include "nodes/plannodes.h"
+#include "executor/executor.h"
+#include "executor/tuptable.h"
+#include "catalog/index.h"
+
+
+bool   BuildingHash = false;
+
+/*
+ *  hashbuild() -- build a new hash index.
+ *
+ *     We use a global variable to record the fact that we're creating
+ *     a new index.  This is used to avoid high-concurrency locking,
+ *     since the index won't be visible until this transaction commits
+ *     and since building is guaranteed to be single-threaded.
+ */
+void
+hashbuild(Relation heap,
+         Relation index,
+         int natts,
+         AttrNumber *attnum,
+         IndexStrategy istrat,
+         uint16 pcount,
+         Datum *params,
+         FuncIndexInfo *finfo,
+         PredInfo *predInfo)
+{
+    HeapScanDesc hscan;
+    Buffer buffer;
+    HeapTuple htup;
+    IndexTuple itup;
+    TupleDesc htupdesc, itupdesc;
+    Datum *attdata;
+    bool *nulls;
+    InsertIndexResult res;
+    int nhtups, nitups;
+    int i;
+    HashItem hitem;
+    ExprContext *econtext;
+    TupleTable tupleTable;
+    TupleTableSlot *slot;
+    Oid hrelid, irelid;
+    Node *pred, *oldPred;
+    
+    /* note that this is a new btree */
+    BuildingHash = true;
+    
+    pred = predInfo->pred;
+    oldPred = predInfo->oldPred;
+    
+    /*  initialize the hash index metadata page (if this is a new index) */
+    if (oldPred == NULL)
+       _hash_metapinit(index);
+    
+    /* get tuple descriptors for heap and index relations */
+    htupdesc = RelationGetTupleDescriptor(heap);
+    itupdesc = RelationGetTupleDescriptor(index);
+    
+    /* get space for data items that'll appear in the index tuple */
+    attdata = (Datum *) palloc(natts * sizeof(Datum));
+    nulls = (bool *) palloc(natts * sizeof(bool));
+    
+    /*
+     * If this is a predicate (partial) index, we will need to evaluate the
+     * predicate using ExecQual, which requires the current tuple to be in a
+     * slot of a TupleTable.  In addition, ExecQual must have an ExprContext
+     * referring to that slot.  Here, we initialize dummy TupleTable and
+     * ExprContext objects for this purpose. --Nels, Feb '92
+     */
+#ifndef OMIT_PARTIAL_INDEX
+    if (pred != NULL || oldPred != NULL) {
+       tupleTable = ExecCreateTupleTable(1);
+       slot = ExecAllocTableSlot(tupleTable);
+       econtext = makeNode(ExprContext);
+       FillDummyExprContext(econtext, slot, htupdesc, buffer);
+    }
+#endif /* OMIT_PARTIAL_INDEX */
+    
+    /* start a heap scan */
+    hscan = heap_beginscan(heap, 0, NowTimeQual, 0, (ScanKey) NULL);
+    htup = heap_getnext(hscan, 0, &buffer);
+    
+    /* build the index */
+    nhtups = nitups = 0;
+    
+    for (; HeapTupleIsValid(htup); htup = heap_getnext(hscan, 0, &buffer)) {
+       
+       nhtups++;
+       
+       /*
+        * If oldPred != NULL, this is an EXTEND INDEX command, so skip
+        * this tuple if it was already in the existing partial index
+        */
+       if (oldPred != NULL) {
+           /*SetSlotContents(slot, htup); */
+#ifndef OMIT_PARTIAL_INDEX
+           slot->val = htup;
+           if (ExecQual((List*)oldPred, econtext) == true) {
+               nitups++;
+               continue;
+           }
+#endif /* OMIT_PARTIAL_INDEX */    
+       }
+       
+       /* Skip this tuple if it doesn't satisfy the partial-index predicate */
+       if (pred != NULL) {
+#ifndef OMIT_PARTIAL_INDEX
+           /*SetSlotContents(slot, htup); */
+           slot->val = htup;
+           if (ExecQual((List*)pred, econtext) == false)
+               continue;
+#endif /* OMIT_PARTIAL_INDEX */        
+}
+       
+       nitups++;
+       
+       /*
+        *  For the current heap tuple, extract all the attributes
+        *  we use in this index, and note which are null.
+        */
+       for (i = 1; i <= natts; i++) {
+           int attoff;
+           bool attnull;
+           
+           /*
+            *  Offsets are from the start of the tuple, and are
+            *  zero-based; indices are one-based.  The next call
+            *  returns i - 1.  That's data hiding for you.
+            */
+           
+           /* attoff = i - 1 */
+           attoff = AttrNumberGetAttrOffset(i);
+           
+           /* below, attdata[attoff] set to equal some datum &
+            * attnull is changed to indicate whether or not the attribute 
+            * is null for this tuple
+            */
+           attdata[attoff] = GetIndexValue(htup, 
+                                           htupdesc,
+                                           attoff, 
+                                           attnum, 
+                                           finfo, 
+                                           &attnull,
+                                           buffer);
+           nulls[attoff] = (attnull ? 'n' : ' ');
+       }
+       
+       /* form an index tuple and point it at the heap tuple */
+       itup = index_formtuple(itupdesc, attdata, nulls);
+       
+       /*
+        *  If the single index key is null, we don't insert it into
+        *  the index.  Hash tables support scans on '='.
+        *  Relational algebra says that A = B
+        *  returns null if either A or B is null.  This
+        *  means that no qualification used in an index scan could ever
+        *  return true on a null attribute.  It also means that indices
+        *  can't be used by ISNULL or NOTNULL scans, but that's an
+        *  artifact of the strategy map architecture chosen in 1986, not
+        *  of the way nulls are handled here.
+        */
+       
+       if (itup->t_info & INDEX_NULL_MASK) {
+           pfree(itup);
+           continue;
+       }
+       
+       itup->t_tid = htup->t_ctid;
+       hitem = _hash_formitem(itup);
+       res = _hash_doinsert(index, hitem);
+       pfree(hitem);
+       pfree(itup);
+       pfree(res);
+    }
+    
+    /* okay, all heap tuples are indexed */
+    heap_endscan(hscan);
+    
+    if (pred != NULL || oldPred != NULL) {
+#ifndef OMIT_PARTIAL_INDEX
+       ExecDestroyTupleTable(tupleTable, true);
+       pfree(econtext);
+#endif /* OMIT_PARTIAL_INDEX */        
+    }
+    
+    /*
+     *  Since we just counted the tuples in the heap, we update its
+     *  stats in pg_class to guarantee that the planner takes advantage
+     *  of the index we just created. Finally, only update statistics
+     *  during normal index definitions, not for indices on system catalogs
+     *  created during bootstrap processing.  We must close the relations
+     *  before updatings statistics to guarantee that the relcache entries
+     *  are flushed when we increment the command counter in UpdateStats().
+     */
+    if (IsNormalProcessingMode())
+       {
+           hrelid = heap->rd_id;
+           irelid = index->rd_id;
+           heap_close(heap);
+           index_close(index);
+           UpdateStats(hrelid, nhtups, true);
+           UpdateStats(irelid, nitups, false);
+           if (oldPred != NULL) {
+               if (nitups == nhtups) pred = NULL;
+               UpdateIndexPredicate(irelid, oldPred, pred);
+           }  
+       }
+    
+    /* be tidy */
+    pfree(nulls);
+    pfree(attdata);
+    
+    /* all done */
+    BuildingHash = false;
+}
+
+/*
+ *  hashinsert() -- insert an index tuple into a hash table. 
+ *
+ *  Hash on the index tuple's key, find the appropriate location 
+ *  for the new tuple, put it there, and return an InsertIndexResult
+ *  to the caller. 
+ */
+InsertIndexResult
+hashinsert(Relation rel, IndexTuple itup)
+{
+    HashItem hitem;
+    InsertIndexResult res;
+    
+    if (itup->t_info & INDEX_NULL_MASK)
+       return ((InsertIndexResult) NULL);
+    
+    hitem = _hash_formitem(itup);
+    
+    res = _hash_doinsert(rel, hitem);
+    
+    pfree(hitem);
+    
+    return (res);
+}
+
+
+/*
+ *  hashgettuple() -- Get the next tuple in the scan.
+ */
+char *
+hashgettuple(IndexScanDesc scan, ScanDirection dir)
+{
+    RetrieveIndexResult res;
+    
+    /*
+     *  If we've already initialized this scan, we can just advance it
+     *  in the appropriate direction.  If we haven't done so yet, we
+     *  call a routine to get the first item in the scan.
+     */
+    
+    if (ItemPointerIsValid(&(scan->currentItemData)))
+       res = _hash_next(scan, dir);
+    else
+       res = _hash_first(scan, dir);
+    
+    return ((char *) res);
+}
+
+
+/*
+ *  hashbeginscan() -- start a scan on a hash index
+ */
+char *
+hashbeginscan(Relation rel,
+             bool fromEnd,
+             uint16 keysz,
+             ScanKey scankey)
+{
+    IndexScanDesc scan;
+    HashScanOpaque so;
+    
+    scan = RelationGetIndexScan(rel, fromEnd, keysz, scankey);
+    so = (HashScanOpaque) palloc(sizeof(HashScanOpaqueData)); 
+    so->hashso_curbuf = so->hashso_mrkbuf = InvalidBuffer;
+    scan->opaque = so; 
+    scan->flags = 0x0;
+    
+    /* register scan in case we change pages it's using */
+    _hash_regscan(scan);
+    
+    return ((char *) scan);
+}
+
+/*
+ *  hashrescan() -- rescan an index relation
+ */
+void
+hashrescan(IndexScanDesc scan, bool fromEnd, ScanKey scankey)
+{
+    ItemPointer iptr;
+    HashScanOpaque so;
+    
+    so = (HashScanOpaque) scan->opaque;
+    
+    /* we hold a read lock on the current page in the scan */
+    if (ItemPointerIsValid(iptr = &(scan->currentItemData))) {
+       _hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ);
+       so->hashso_curbuf = InvalidBuffer;
+       ItemPointerSetInvalid(iptr);
+    }
+    if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) {
+       _hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ);
+       so->hashso_mrkbuf = InvalidBuffer;
+       ItemPointerSetInvalid(iptr);
+    }
+    
+    /* reset the scan key */
+    if (scan->numberOfKeys > 0) {
+       memmove(scan->keyData,
+               scankey,
+               scan->numberOfKeys * sizeof(ScanKeyData));
+    }
+}
+
+/*
+ *  hashendscan() -- close down a scan
+ */
+void
+hashendscan(IndexScanDesc scan)
+{
+    
+    ItemPointer iptr;
+    HashScanOpaque so;
+    
+    so = (HashScanOpaque) scan->opaque;
+    
+    /* release any locks we still hold */
+    if (ItemPointerIsValid(iptr = &(scan->currentItemData))) {
+       _hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ);
+       so->hashso_curbuf = InvalidBuffer;
+       ItemPointerSetInvalid(iptr);
+    }
+    
+    if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) {
+       if (BufferIsValid(so->hashso_mrkbuf))
+           _hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ);
+       so->hashso_mrkbuf = InvalidBuffer;
+       ItemPointerSetInvalid(iptr);
+    }
+    
+    /* don't need scan registered anymore */
+    _hash_dropscan(scan);
+    
+    /* be tidy */
+#ifdef PERFECT_MMGR
+    pfree (scan->opaque);
+#endif /* PERFECT_MMGR */
+}
+
+/*
+ *  hashmarkpos() -- save current scan position
+ *
+ */
+void
+hashmarkpos(IndexScanDesc scan)
+{
+    ItemPointer iptr;
+    HashScanOpaque so;
+    
+    /*  see if we ever call this code. if we do, then so_mrkbuf a
+     *  useful element in the scan->opaque structure. if this procedure
+     *  is never called, so_mrkbuf should be removed from the scan->opaque
+     *  structure. 
+     */
+    elog(NOTICE, "Hashmarkpos() called.");
+    
+    so = (HashScanOpaque) scan->opaque;
+    
+    /* release lock on old marked data, if any */
+    if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) {
+       _hash_relbuf(scan->relation, so->hashso_mrkbuf, HASH_READ);
+       so->hashso_mrkbuf = InvalidBuffer;
+       ItemPointerSetInvalid(iptr);
+    }
+    
+    /* bump lock on currentItemData and copy to currentMarkData */
+    if (ItemPointerIsValid(&(scan->currentItemData))) {
+       so->hashso_mrkbuf = _hash_getbuf(scan->relation,
+                                        BufferGetBlockNumber(so->hashso_curbuf),
+                                        HASH_READ);
+       scan->currentMarkData = scan->currentItemData;
+    }
+}
+
+/*
+ *  hashrestrpos() -- restore scan to last saved position
+ */
+void
+hashrestrpos(IndexScanDesc scan)
+{
+    ItemPointer iptr;
+    HashScanOpaque so;
+    
+    /*  see if we ever call this code. if we do, then so_mrkbuf a
+     *  useful element in the scan->opaque structure. if this procedure
+     *  is never called, so_mrkbuf should be removed from the scan->opaque
+     *  structure. 
+     */
+    elog(NOTICE, "Hashrestrpos() called.");
+    
+    so = (HashScanOpaque) scan->opaque;
+    
+    /* release lock on current data, if any */
+    if (ItemPointerIsValid(iptr = &(scan->currentItemData))) {
+       _hash_relbuf(scan->relation, so->hashso_curbuf, HASH_READ);
+       so->hashso_curbuf = InvalidBuffer;
+       ItemPointerSetInvalid(iptr);
+    }
+    
+    /* bump lock on currentMarkData and copy to currentItemData */
+    if (ItemPointerIsValid(&(scan->currentMarkData))) {
+       so->hashso_curbuf =
+           _hash_getbuf(scan->relation,
+                        BufferGetBlockNumber(so->hashso_mrkbuf),
+                        HASH_READ);
+       
+       scan->currentItemData = scan->currentMarkData;
+    }
+}
+
+/* stubs */
+void
+hashdelete(Relation rel, ItemPointer tid)
+{
+    /* adjust any active scans that will be affected by this deletion */
+    _hash_adjscans(rel, tid);
+    
+    /* delete the data from the page */
+    _hash_pagedel(rel, tid);
+}
+
diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c

new file mode 100644 (file)

index 0000000..f1084c4
--- /dev/null
+++ b/src/backend/access/hash/hashfunc.c
@@ -0,0 +1,276 @@
+/*-------------------------------------------------------------------------
+ *
+ * hashfunc.c--
+ *    Comparison functions for hash access method.
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header$
+ *
+ * NOTES
+ *    These functions are stored in pg_amproc.  For each operator class
+ *    defined on hash tables, they compute the hash value of the argument.
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+#include "utils/nabstime.h"
+
+uint32 hashint2(int16 key)
+{
+    return ((uint32) ~key);
+}
+
+uint32 hashint4(uint32 key)
+{
+    return (~key);
+}
+
+/* Hash function from Chris Torek. */
+uint32 hashfloat4(float32 keyp)
+{
+    int len;
+    int loop;
+    uint32 h;
+    char *kp = (char *) keyp;
+
+    len = sizeof(float32data);
+
+#define HASH4a   h = (h << 5) - h + *kp++;
+#define HASH4b   h = (h << 5) + h + *kp++;
+#define HASH4 HASH4b
+
+
+    h = 0;
+    if (len > 0) {
+       loop = (len + 8 - 1) >> 3;
+       
+       switch (len & (8 - 1)) {
+       case 0:
+           do {        /* All fall throughs */
+               HASH4;
+           case 7:
+               HASH4;
+           case 6:
+               HASH4;
+           case 5:
+               HASH4;
+           case 4:
+               HASH4;
+           case 3:
+               HASH4;
+           case 2:
+               HASH4;
+           case 1:
+               HASH4;
+           } while (--loop);
+       }
+    }
+    return (h);
+}      
+
+
+uint32 hashfloat8(float64 keyp)
+{
+    int len;
+    int loop;
+    uint32 h;
+    char *kp = (char *) keyp;
+
+    len = sizeof(float64data);
+
+#define HASH4a   h = (h << 5) - h + *kp++;
+#define HASH4b   h = (h << 5) + h + *kp++;
+#define HASH4 HASH4b
+
+
+    h = 0;
+    if (len > 0) {
+       loop = (len + 8 - 1) >> 3;
+       
+       switch (len & (8 - 1)) {
+       case 0:
+           do {        /* All fall throughs */
+               HASH4;
+           case 7:
+               HASH4;
+           case 6:
+               HASH4;
+           case 5:
+               HASH4;
+           case 4:
+               HASH4;
+           case 3:
+               HASH4;
+           case 2:
+               HASH4;
+           case 1:
+               HASH4;
+           } while (--loop);
+       }
+    }
+    return (h);
+}      
+
+
+uint32 hashoid(Oid key)
+{
+    return ((uint32) ~key);
+}
+
+
+uint32 hashchar(char key)
+{
+    int len;
+    uint32 h;
+
+    len = sizeof(char);
+
+#define PRIME1         37
+#define PRIME2         1048583
+
+    h = 0;
+    /* Convert char to integer */
+    h = h * PRIME1 ^ (key - ' ');
+    h %= PRIME2;
+    
+    return (h);
+}
+
+uint32 hashchar2(uint16 intkey)
+{
+    uint32 h;
+    int len;
+    char *key = (char *) &intkey;
+ 
+    h = 0;
+    len = sizeof(uint16);
+    /* Convert string to integer */
+    while (len--)
+       h = h * PRIME1 ^ (*key++ - ' ');
+    h %= PRIME2;
+       
+    return (h);
+}
+
+uint32 hashchar4(uint32 intkey)
+{
+    uint32 h;
+    int len;
+    char *key = (char *) &intkey;
+ 
+    h = 0;
+    len = sizeof(uint32);
+    /* Convert string to integer */
+    while (len--)
+       h = h * PRIME1 ^ (*key++ - ' ');
+    h %= PRIME2;
+       
+    return (h);
+}
+
+uint32 hashchar8(char *key)
+{
+    uint32 h;
+    int len;
+ 
+    h = 0;
+    len = sizeof(char8);
+    /* Convert string to integer */
+    while (len--)
+       h = h * PRIME1 ^ (*key++ - ' ');
+    h %= PRIME2;
+       
+    return (h);
+}
+
+uint32 hashname(NameData *n)
+{
+    uint32 h;
+    int len;
+    char *key;
+
+    key = n->data;
+ 
+    h = 0;
+    len = NAMEDATALEN;
+    /* Convert string to integer */
+    while (len--)
+       h = h * PRIME1 ^ (*key++ - ' ');
+    h %= PRIME2;
+       
+    return (h);
+}
+
+
+uint32 hashchar16(char *key)
+{
+    uint32 h;
+    int len;
+ 
+    h = 0;
+    len = sizeof(char16);
+    /* Convert string to integer */
+    while (len--)
+       h = h * PRIME1 ^ (*key++ - ' ');
+    h %= PRIME2;
+       
+    return (h);
+}
+
+
+/*
+ * (Comment from the original db3 hashing code: )
+ *
+ * "This is INCREDIBLY ugly, but fast.  We break the string up into 8 byte
+ * units.  On the first time through the loop we get the 'leftover bytes'
+ * (strlen % 8).  On every other iteration, we perform 8 HASHC's so we handle
+ * all 8 bytes.  Essentially, this saves us 7 cmp & branch instructions.  If
+ * this routine is heavily used enough, it's worth the ugly coding.
+ *
+ * "OZ's original sdbm hash"
+ */
+uint32 hashtext(struct varlena *key)
+{
+    int keylen;
+    char *keydata;
+    uint32 n;
+    int loop;
+
+    keydata = VARDATA(key);
+    keylen = VARSIZE(key);
+
+    /* keylen includes the four bytes in which string keylength is stored */
+    keylen -= sizeof(VARSIZE(key));
+
+#define HASHC   n = *keydata++ + 65599 * n
+
+    n = 0;
+    if (keylen > 0) {
+       loop = (keylen + 8 - 1) >> 3;
+       
+       switch (keylen & (8 - 1)) {
+       case 0:
+           do {        /* All fall throughs */
+               HASHC;
+           case 7:
+               HASHC;
+           case 6:
+               HASHC;
+           case 5:
+               HASHC;
+           case 4:
+               HASHC;
+           case 3:
+               HASHC;
+           case 2:
+               HASHC;
+           case 1:
+               HASHC;
+           } while (--loop);
+       }
+    }
+    return (n);
+}      
diff --git a/src/backend/access/hash/hashinsert.c b/src/backend/access/hash/hashinsert.c

new file mode 100644 (file)

index 0000000..4c04682
--- /dev/null
+++ b/src/backend/access/hash/hashinsert.c
@@ -0,0 +1,239 @@
+/*-------------------------------------------------------------------------
+ *
+ * hashinsert.c--
+ *    Item insertion in hash tables for Postgres.
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header$
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "storage/bufmgr.h"
+#include "storage/bufpage.h"
+
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/rel.h"
+#include "utils/excid.h"
+
+#include "access/heapam.h"
+#include "access/genam.h"
+#include "access/hash.h"
+
+static InsertIndexResult _hash_insertonpg(Relation rel, Buffer buf, int keysz, ScanKey scankey, HashItem hitem, Buffer metabuf);
+static OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf, int keysz, ScanKey itup_scankey, Size itemsize, HashItem hitem);
+
+/*
+ *  _hash_doinsert() -- Handle insertion of a single HashItem in the table.
+ *
+ *     This routine is called by the public interface routines, hashbuild
+ *     and hashinsert.  By here, hashitem is filled in, and has a unique
+ *     (xid, seqno) pair. The datum to be used as a "key" is in the
+ *     hashitem. 
+ */
+InsertIndexResult
+_hash_doinsert(Relation rel, HashItem hitem)
+{
+    Buffer buf;
+    Buffer metabuf;
+    BlockNumber blkno;
+    HashMetaPage metap;
+    IndexTuple itup;
+    InsertIndexResult res;
+    ScanKey itup_scankey;
+    int natts;
+    Page page;
+    
+    metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ);
+    metap = (HashMetaPage) BufferGetPage(metabuf);
+    _hash_checkpage((Page) metap, LH_META_PAGE);
+    
+    /* we need a scan key to do our search, so build one */
+    itup = &(hitem->hash_itup);
+    if ((natts = rel->rd_rel->relnatts) != 1)
+       elog(WARN, "Hash indices valid for only one index key.");
+    itup_scankey = _hash_mkscankey(rel, itup, metap);
+    
+    /* 
+     * find the first page in the bucket chain containing this key and
+     * place it in buf.  _hash_search obtains a read lock for us.
+     */
+    _hash_search(rel, natts, itup_scankey, &buf, metap);
+    page = BufferGetPage(buf);
+    _hash_checkpage(page, LH_BUCKET_PAGE);
+
+    /*
+     * trade in our read lock for a write lock so that we can do the
+     * insertion.
+     */
+    blkno = BufferGetBlockNumber(buf);
+    _hash_relbuf(rel, buf, HASH_READ);
+    buf = _hash_getbuf(rel, blkno, HASH_WRITE);
+    
+    
+    /*
+     * XXX btree comment (haven't decided what to do in hash): don't
+     * think the bucket can be split while we're reading the metapage.
+     *
+     * If the page was split between the time that we surrendered our
+     * read lock and acquired our write lock, then this page may no
+     * longer be the right place for the key we want to insert.
+     */
+    
+    /* do the insertion */
+    res = _hash_insertonpg(rel, buf, natts, itup_scankey,
+                          hitem, metabuf);
+    
+    /* be tidy */
+    _hash_freeskey(itup_scankey);
+    
+    return (res);
+}
+
+/*
+ *  _hash_insertonpg() -- Insert a tuple on a particular page in the table.
+ *
+ *     This recursive procedure does the following things:
+ *
+ *         +  if necessary, splits the target page.  
+ *         +  inserts the tuple.
+ *
+ *     On entry, we must have the right buffer on which to do the
+ *     insertion, and the buffer must be pinned and locked.  On return,
+ *     we will have dropped both the pin and the write lock on the buffer.
+ *
+ */
+static InsertIndexResult
+_hash_insertonpg(Relation rel,
+                Buffer buf,
+                int keysz,
+                ScanKey scankey,
+                HashItem hitem,
+                Buffer metabuf)
+{
+    InsertIndexResult res; 
+    Page page;
+    BlockNumber itup_blkno;
+    OffsetNumber itup_off;
+    int itemsz;
+    HashPageOpaque pageopaque;
+    bool do_expand = false;     
+    Buffer ovflbuf;
+    HashMetaPage metap;
+    Bucket bucket;
+    
+    metap = (HashMetaPage) BufferGetPage(metabuf);
+    _hash_checkpage((Page) metap, LH_META_PAGE);
+    
+    page = BufferGetPage(buf);
+    _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
+    pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
+    bucket = pageopaque->hasho_bucket;
+
+    itemsz = IndexTupleDSize(hitem->hash_itup)
+       + (sizeof(HashItemData) - sizeof(IndexTupleData));
+    itemsz = DOUBLEALIGN(itemsz);
+    
+    while (PageGetFreeSpace(page) < itemsz) {
+       /* 
+         * no space on this page; check for an overflow page 
+        */
+       if (BlockNumberIsValid(pageopaque->hasho_nextblkno)) {
+           /* 
+            * ovfl page exists; go get it.  if it doesn't have room,
+            * we'll find out next pass through the loop test above.
+            */
+           ovflbuf = _hash_getbuf(rel, pageopaque->hasho_nextblkno,
+                                  HASH_WRITE);
+           _hash_relbuf(rel, buf, HASH_WRITE);
+           buf = ovflbuf;
+           page = BufferGetPage(buf);
+       } else {
+           /* 
+            * we're at the end of the bucket chain and we haven't
+            * found a page with enough room.  allocate a new overflow
+            * page.
+            */
+           do_expand = true;
+           ovflbuf = _hash_addovflpage(rel, &metabuf, buf);
+           _hash_relbuf(rel, buf, HASH_WRITE);
+           buf = ovflbuf;
+           page = BufferGetPage(buf);
+
+           if (PageGetFreeSpace(page) < itemsz) {
+               /* it doesn't fit on an empty page -- give up */
+               elog(WARN, "hash item too large");
+           }
+       }
+       _hash_checkpage(page, LH_OVERFLOW_PAGE);
+       pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
+       Assert(pageopaque->hasho_bucket == bucket);
+    }
+
+    itup_off = _hash_pgaddtup(rel, buf, keysz, scankey, itemsz, hitem);
+    itup_blkno = BufferGetBlockNumber(buf);
+    
+    /* by here, the new tuple is inserted */
+    res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData));
+    
+    ItemPointerSet(&(res->pointerData), itup_blkno, itup_off);
+    
+    if (res != NULL) {
+       /* 
+        * Increment the number of keys in the table.
+        * We switch lock access type just for a moment
+        * to allow greater accessibility to the metapage. 
+        */
+       metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf,
+                                                 HASH_READ, HASH_WRITE);
+       metap->hashm_nkeys += 1;
+       metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf,
+                                                 HASH_WRITE, HASH_READ);
+       
+    }
+    
+    _hash_wrtbuf(rel, buf);
+    
+    if (do_expand || 
+       (metap->hashm_nkeys / (metap->hashm_maxbucket + 1))
+       > metap->hashm_ffactor) {
+       _hash_expandtable(rel, metabuf);
+    }
+    _hash_relbuf(rel, metabuf, HASH_READ);
+    return (res);
+}      
+
+/*
+ *  _hash_pgaddtup() -- add a tuple to a particular page in the index.
+ *
+ *     This routine adds the tuple to the page as requested, and keeps the
+ *     write lock and reference associated with the page's buffer.  It is
+ *     an error to call pgaddtup() without a write lock and reference.
+ */
+static OffsetNumber
+_hash_pgaddtup(Relation rel,
+              Buffer buf,
+              int keysz,
+              ScanKey itup_scankey,
+              Size itemsize,
+              HashItem hitem)
+{
+    OffsetNumber itup_off;
+    Page page;
+    
+    page = BufferGetPage(buf);
+    _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
+
+    itup_off = OffsetNumberNext(PageGetMaxOffsetNumber(page));
+    (void) PageAddItem(page, (Item) hitem, itemsize, itup_off, LP_USED);
+    
+    /* write the buffer, but hold our lock */
+    _hash_wrtnorelbuf(rel, buf);
+    
+    return (itup_off);
+}
diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c

new file mode 100644 (file)

index 0000000..ece1669
--- /dev/null
+++ b/src/backend/access/hash/hashovfl.c
@@ -0,0 +1,614 @@
+/*-------------------------------------------------------------------------
+ *
+ * hashovfl.c--
+ *    Overflow page management code for the Postgres hash access method
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header$
+ *
+ * NOTES
+ *    Overflow pages look like ordinary relation pages.
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "storage/bufmgr.h"
+#include "storage/bufpage.h"
+
+#include "utils/elog.h"
+#include "utils/rel.h"
+#include "utils/excid.h"
+
+#include "access/genam.h"
+#include "access/hash.h"
+
+static OverflowPageAddress _hash_getovfladdr(Relation rel, Buffer *metabufp);
+static uint32 _hash_firstfreebit(uint32 map);
+
+/*
+ *  _hash_addovflpage
+ *
+ *  Add an overflow page to the page currently pointed to by the buffer 
+ *  argument 'buf'. 
+ *
+ *  *Metabufp has a read lock upon entering the function; buf has a 
+ *  write lock. 
+ *  
+ */
+Buffer
+_hash_addovflpage(Relation rel, Buffer *metabufp, Buffer buf)
+{
+    
+    OverflowPageAddress oaddr;
+    BlockNumber ovflblkno;
+    Buffer ovflbuf;
+    HashMetaPage metap;
+    HashPageOpaque ovflopaque;
+    HashPageOpaque pageopaque;
+    Page page;
+    Page ovflpage;
+    
+    /* this had better be the last page in a bucket chain */
+    page = BufferGetPage(buf);
+    _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
+    pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
+    Assert(!BlockNumberIsValid(pageopaque->hasho_nextblkno));
+    
+    metap = (HashMetaPage) BufferGetPage(*metabufp);
+    _hash_checkpage((Page) metap, LH_META_PAGE);
+
+    /* allocate an empty overflow page */
+    oaddr = _hash_getovfladdr(rel, metabufp);
+    if (oaddr == InvalidOvflAddress) {
+       elog(WARN, "_hash_addovflpage: problem with _hash_getovfladdr.");
+    }
+    ovflblkno = OADDR_TO_BLKNO(OADDR_OF(SPLITNUM(oaddr), OPAGENUM(oaddr)));
+    Assert(BlockNumberIsValid(ovflblkno));
+    ovflbuf = _hash_getbuf(rel, ovflblkno, HASH_WRITE);
+    Assert(BufferIsValid(ovflbuf));
+    ovflpage = BufferGetPage(ovflbuf);
+
+    /* initialize the new overflow page */
+    _hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf));
+    ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage);
+    ovflopaque->hasho_prevblkno = BufferGetBlockNumber(buf);
+    ovflopaque->hasho_nextblkno = InvalidBlockNumber;
+    ovflopaque->hasho_flag = LH_OVERFLOW_PAGE;
+    ovflopaque->hasho_oaddr = oaddr;
+    ovflopaque->hasho_bucket = pageopaque->hasho_bucket;
+    _hash_wrtnorelbuf(rel, ovflbuf);
+    
+    /* logically chain overflow page to previous page */
+    pageopaque->hasho_nextblkno = ovflblkno;
+    _hash_wrtnorelbuf(rel, buf);
+    return (ovflbuf);
+}
+
+/*
+ *  _hash_getovfladdr()
+ *
+ *  Find an available overflow page and return its address. 
+ *
+ *  When we enter this function, we have a read lock on *metabufp which
+ *  we change to a write lock immediately. Before exiting, the write lock
+ *  is exchanged for a read lock. 
+ *
+ */
+static OverflowPageAddress
+_hash_getovfladdr(Relation rel, Buffer *metabufp)
+{
+    HashMetaPage metap;
+    Buffer mapbuf;
+    BlockNumber blkno;
+    PageOffset offset;
+    OverflowPageAddress oaddr;
+    SplitNumber splitnum;
+    uint32 *freep;
+    uint32 max_free; 
+    uint32 bit;
+    uint32 first_page; 
+    uint32 free_bit; 
+    uint32 free_page; 
+    uint32 in_use_bits;
+    uint32 i, j;
+    
+    metap = (HashMetaPage) _hash_chgbufaccess(rel, metabufp, HASH_READ, HASH_WRITE);
+    
+    splitnum = metap->OVFL_POINT;
+    max_free = metap->SPARES[splitnum];
+    
+    free_page = (max_free - 1) >> (metap->BSHIFT + BYTE_TO_BIT);
+    free_bit = (max_free - 1) & (BMPGSZ_BIT(metap) - 1);
+    
+    /* Look through all the free maps to find the first free block */
+    first_page = metap->LAST_FREED >> (metap->BSHIFT + BYTE_TO_BIT);
+    for ( i = first_page; i <= free_page; i++ ) {
+       Page mappage;
+
+       blkno = metap->hashm_mapp[i];
+       mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE);
+       mappage = BufferGetPage(mapbuf);
+       _hash_checkpage(mappage, LH_BITMAP_PAGE);
+       freep = HashPageGetBitmap(mappage);
+       Assert(freep);
+       
+       if (i == free_page)
+           in_use_bits = free_bit;
+       else
+           in_use_bits = BMPGSZ_BIT(metap) - 1;
+       
+       if (i == first_page) {
+           bit = metap->LAST_FREED & (BMPGSZ_BIT(metap) - 1);
+           j = bit / BITS_PER_MAP;
+           bit = bit & ~(BITS_PER_MAP - 1);
+       } else {
+           bit = 0;
+           j = 0;
+       }
+       for (; bit <= in_use_bits; j++, bit += BITS_PER_MAP)
+           if (freep[j] != ALL_SET)
+               goto found;
+    }
+    
+    /* No Free Page Found - have to allocate a new page */
+    metap->LAST_FREED = metap->SPARES[splitnum];
+    metap->SPARES[splitnum]++;
+    offset = metap->SPARES[splitnum] -
+       (splitnum ? metap->SPARES[splitnum - 1] : 0);
+    
+#define        OVMSG   "HASH: Out of overflow pages.  Out of luck.\n"
+    
+    if (offset > SPLITMASK) {
+       if (++splitnum >= NCACHED) {
+           elog(WARN, OVMSG);
+       }
+       metap->OVFL_POINT = splitnum;
+       metap->SPARES[splitnum] = metap->SPARES[splitnum-1];
+       metap->SPARES[splitnum-1]--;
+       offset = 0;
+    }
+    
+    /* Check if we need to allocate a new bitmap page */
+    if (free_bit == BMPGSZ_BIT(metap) - 1) {
+       /* won't be needing old map page */
+
+       _hash_relbuf(rel, mapbuf, HASH_WRITE);
+
+       free_page++;
+       if (free_page >= NCACHED) {
+           elog(WARN, OVMSG);
+       }
+       
+       /*
+        * This is tricky.  The 1 indicates that you want the new page
+        * allocated with 1 clear bit.  Actually, you are going to
+        * allocate 2 pages from this map.  The first is going to be
+        * the map page, the second is the overflow page we were
+        * looking for.  The init_bitmap routine automatically, sets
+        * the first bit of itself to indicate that the bitmap itself
+        * is in use.  We would explicitly set the second bit, but
+        * don't have to if we tell init_bitmap not to leave it clear
+        * in the first place.
+        */
+       if (_hash_initbitmap(rel, metap, OADDR_OF(splitnum, offset),
+                            1, free_page)) {
+           elog(WARN, "overflow_page: problem with _hash_initbitmap.");
+       }
+       metap->SPARES[splitnum]++;
+       offset++;
+       if (offset > SPLITMASK) {
+           if (++splitnum >= NCACHED) {
+               elog(WARN, OVMSG);
+           }
+           metap->OVFL_POINT = splitnum;
+           metap->SPARES[splitnum] = metap->SPARES[splitnum-1];
+           metap->SPARES[splitnum-1]--;
+           offset = 0;
+       }
+    } else {
+       
+       /*
+        * Free_bit addresses the last used bit.  Bump it to address
+        * the first available bit.
+        */
+       free_bit++;
+       SETBIT(freep, free_bit);
+       _hash_wrtbuf(rel, mapbuf);
+    }
+    
+    /* Calculate address of the new overflow page */
+    oaddr = OADDR_OF(splitnum, offset);
+    _hash_chgbufaccess(rel, metabufp, HASH_WRITE, HASH_READ);
+    return (oaddr);
+    
+ found:
+    bit = bit + _hash_firstfreebit(freep[j]);
+    SETBIT(freep, bit);
+    _hash_wrtbuf(rel, mapbuf);
+    
+    /*
+     * Bits are addressed starting with 0, but overflow pages are addressed
+     * beginning at 1. Bit is a bit addressnumber, so we need to increment
+     * it to convert it to a page number.
+     */
+    
+    bit = 1 + bit + (i * BMPGSZ_BIT(metap));
+    if (bit >= metap->LAST_FREED) {
+       metap->LAST_FREED = bit - 1;
+    }
+    
+    /* Calculate the split number for this page */
+    for (i = 0; (i < splitnum) && (bit > metap->SPARES[i]); i++)
+       ;
+    offset = (i ? bit - metap->SPARES[i - 1] : bit);
+    if (offset >= SPLITMASK) {
+       elog(WARN, OVMSG);
+    }
+    
+    /* initialize this page */
+    oaddr = OADDR_OF(i, offset);
+    _hash_chgbufaccess(rel, metabufp, HASH_WRITE, HASH_READ);
+    return (oaddr);
+}
+
+/*
+ *  _hash_firstfreebit()
+ *
+ *  Return the first bit that is not set in the argument 'map'. This
+ *  function is used to find an available overflow page within a
+ *  splitnumber. 
+ * 
+ */
+static uint32
+_hash_firstfreebit(uint32 map)
+{
+    uint32 i, mask;
+    
+    mask = 0x1;
+    for (i = 0; i < BITS_PER_MAP; i++) {
+       if (!(mask & map))
+           return (i);
+       mask = mask << 1;
+    }
+    return (i);
+}
+
+/*
+ *  _hash_freeovflpage() - 
+ *
+ *  Mark this overflow page as free and return a buffer with 
+ *  the page that follows it (which may be defined as
+ *  InvalidBuffer). 
+ *
+ */
+Buffer
+_hash_freeovflpage(Relation rel, Buffer ovflbuf)
+{
+    HashMetaPage metap;
+    Buffer metabuf;
+    Buffer mapbuf;
+    BlockNumber prevblkno;
+    BlockNumber blkno;
+    BlockNumber nextblkno;
+    HashPageOpaque ovflopaque;
+    Page ovflpage;
+    Page mappage;
+    OverflowPageAddress addr;
+    SplitNumber splitnum;
+    uint32 *freep;
+    uint32 ovflpgno;
+    int32 bitmappage, bitmapbit;
+    Bucket bucket;
+    
+    metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE);
+    metap = (HashMetaPage) BufferGetPage(metabuf);
+    _hash_checkpage((Page) metap, LH_META_PAGE);
+    
+    ovflpage = BufferGetPage(ovflbuf);
+    _hash_checkpage(ovflpage, LH_OVERFLOW_PAGE);
+    ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage);
+    addr = ovflopaque->hasho_oaddr;
+    nextblkno = ovflopaque->hasho_nextblkno;
+    prevblkno = ovflopaque->hasho_prevblkno;
+    bucket = ovflopaque->hasho_bucket;
+    (void) memset(ovflpage, 0, BufferGetPageSize(ovflbuf));
+    _hash_wrtbuf(rel, ovflbuf);
+    
+    /* 
+     * fix up the bucket chain.  this is a doubly-linked list, so we
+     * must fix up the bucket chain members behind and ahead of the
+     * overflow page being deleted.
+     *
+     * XXX this should look like:
+     * - lock prev/next
+     * - modify/write prev/next (how to do write ordering with a
+     * doubly-linked list???)
+     * - unlock prev/next
+     */
+    if (BlockNumberIsValid(prevblkno)) {
+       Buffer prevbuf = _hash_getbuf(rel, prevblkno, HASH_WRITE);
+       Page prevpage = BufferGetPage(prevbuf);
+       HashPageOpaque prevopaque =
+           (HashPageOpaque) PageGetSpecialPointer(prevpage);
+
+       _hash_checkpage(prevpage, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
+       Assert(prevopaque->hasho_bucket == bucket);
+       prevopaque->hasho_nextblkno = nextblkno;
+       _hash_wrtbuf(rel, prevbuf);
+    }
+    if (BlockNumberIsValid(nextblkno)) {
+       Buffer nextbuf = _hash_getbuf(rel, nextblkno, HASH_WRITE);
+       Page nextpage = BufferGetPage(nextbuf);
+       HashPageOpaque nextopaque =
+           (HashPageOpaque) PageGetSpecialPointer(nextpage);
+       
+       _hash_checkpage(nextpage, LH_OVERFLOW_PAGE);
+       Assert(nextopaque->hasho_bucket == bucket);
+       nextopaque->hasho_prevblkno = prevblkno;
+       _hash_wrtbuf(rel, nextbuf);
+    }
+    
+    /* 
+     * Fix up the overflow page bitmap that tracks this particular
+     * overflow page. The bitmap can be found in the MetaPageData
+     * array element hashm_mapp[bitmappage].
+     */
+    splitnum = (addr >> SPLITSHIFT);
+    ovflpgno =
+       (splitnum ? metap->SPARES[splitnum - 1] : 0) + (addr & SPLITMASK) - 1;
+    
+    if (ovflpgno < metap->LAST_FREED) {
+       metap->LAST_FREED = ovflpgno;
+    }
+    
+    bitmappage = (ovflpgno >> (metap->BSHIFT + BYTE_TO_BIT));
+    bitmapbit = ovflpgno & (BMPGSZ_BIT(metap) - 1);
+    
+    blkno = metap->hashm_mapp[bitmappage];
+    mapbuf = _hash_getbuf(rel, blkno, HASH_WRITE);
+    mappage = BufferGetPage(mapbuf);
+    _hash_checkpage(mappage, LH_BITMAP_PAGE);
+    freep = HashPageGetBitmap(mappage);
+    CLRBIT(freep, bitmapbit);
+    _hash_wrtbuf(rel, mapbuf);
+    
+    _hash_relbuf(rel, metabuf, HASH_WRITE);
+    
+    /* 
+     * now instantiate the page that replaced this one, 
+     * if it exists, and return that buffer with a write lock.
+     */
+    if (BlockNumberIsValid(nextblkno)) {
+       return (_hash_getbuf(rel, nextblkno, HASH_WRITE));
+    } else {
+       return (InvalidBuffer);
+    }
+}
+
+
+/*
+ *  _hash_initbitmap()
+ *  
+ *   Initialize a new bitmap page.  The metapage has a write-lock upon
+ *   entering the function.
+ *
+ * 'pnum' is the OverflowPageAddress of the new bitmap page.
+ * 'nbits' is how many bits to clear (i.e., make available) in the new
+ * bitmap page.  the remainder of the bits (as well as the first bit,
+ * representing the bitmap page itself) will be set.
+ * 'ndx' is the 0-based offset of the new bitmap page within the
+ * metapage's array of bitmap page OverflowPageAddresses.
+ */
+
+#define INT_MASK       ((1 << INT_TO_BIT) -1)
+
+int32
+_hash_initbitmap(Relation rel,
+                HashMetaPage metap,
+                int32 pnum,
+                int32 nbits,
+                int32 ndx)
+{
+    Buffer buf;
+    BlockNumber blkno;
+    Page pg;
+    HashPageOpaque op;
+    uint32 *freep;
+    int clearbytes, clearints;
+    
+    blkno = OADDR_TO_BLKNO(pnum);
+    buf = _hash_getbuf(rel, blkno, HASH_WRITE);
+    pg = BufferGetPage(buf);
+    _hash_pageinit(pg, BufferGetPageSize(buf));
+    op = (HashPageOpaque) PageGetSpecialPointer(pg);
+    op->hasho_oaddr = InvalidOvflAddress;
+    op->hasho_prevblkno = InvalidBlockNumber;
+    op->hasho_nextblkno = InvalidBlockNumber;
+    op->hasho_flag = LH_BITMAP_PAGE;
+    op->hasho_bucket = -1;
+
+    freep = HashPageGetBitmap(pg);
+
+    /* set all of the bits above 'nbits' to 1 */
+    clearints = ((nbits - 1) >> INT_TO_BIT) + 1;
+    clearbytes = clearints << INT_TO_BYTE;
+    (void) memset((char *) freep, 0, clearbytes);
+    (void) memset(((char *) freep) + clearbytes, 0xFF,
+                 BMPGSZ_BYTE(metap) - clearbytes);
+    freep[clearints - 1] = ALL_SET << (nbits & INT_MASK);
+
+    /* bit 0 represents the new bitmap page */
+    SETBIT(freep, 0);
+        
+    /* metapage already has a write lock */
+    metap->hashm_nmaps++;
+    metap->hashm_mapp[ndx] = blkno;
+    
+    /* write out the new bitmap page (releasing its locks) */
+    _hash_wrtbuf(rel, buf);
+
+    return (0);
+}
+
+
+/*
+ *  _hash_squeezebucket(rel, bucket)
+ *
+ *  Try to squeeze the tuples onto pages occuring earlier in the
+ *  bucket chain in an attempt to free overflow pages. When we start
+ *  the "squeezing", the page from which we start taking tuples (the
+ *  "read" page) is the last bucket in the bucket chain and the page
+ *  onto which we start squeezing tuples (the "write" page) is the
+ *  first page in the bucket chain.  The read page works backward and
+ *  the write page works forward; the procedure terminates when the
+ *  read page and write page are the same page.
+ */
+void
+_hash_squeezebucket(Relation rel,
+                   HashMetaPage metap, 
+                   Bucket bucket)
+{
+    Buffer wbuf;
+    Buffer rbuf;
+    BlockNumber wblkno;                
+    BlockNumber rblkno;                
+    Page wpage;
+    Page rpage;
+    HashPageOpaque wopaque;
+    HashPageOpaque ropaque;
+    OffsetNumber woffnum;
+    OffsetNumber roffnum;
+    HashItem hitem;
+    int itemsz;
+    
+/*    elog(DEBUG, "_hash_squeezebucket: squeezing bucket %d", bucket); */
+
+    /*
+     * start squeezing into the base bucket page.
+     */
+    wblkno = BUCKET_TO_BLKNO(bucket);
+    wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE);
+    wpage = BufferGetPage(wbuf);
+    _hash_checkpage(wpage, LH_BUCKET_PAGE);
+    wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
+    
+    /*
+     * if there aren't any overflow pages, there's nothing to squeeze.
+     */
+    if (!BlockNumberIsValid(wopaque->hasho_nextblkno)) {
+       _hash_relbuf(rel, wbuf, HASH_WRITE);
+       return;
+    }
+    
+    /*
+     * find the last page in the bucket chain by starting at the base
+     * bucket page and working forward.
+     *
+     * XXX if chains tend to be long, we should probably move forward
+     * using HASH_READ and then _hash_chgbufaccess to HASH_WRITE when
+     * we reach the end.  if they are short we probably don't care
+     * very much.  if the hash function is working at all, they had
+     * better be short..
+     */
+    ropaque = wopaque;
+    do {
+       rblkno = ropaque->hasho_nextblkno;
+       if (ropaque != wopaque) {
+           _hash_relbuf(rel, rbuf, HASH_WRITE);
+       }
+       rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE);
+       rpage = BufferGetPage(rbuf);
+       _hash_checkpage(rpage, LH_OVERFLOW_PAGE);
+       Assert(!PageIsEmpty(rpage));
+       ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
+       Assert(ropaque->hasho_bucket == bucket);
+    } while (BlockNumberIsValid(ropaque->hasho_nextblkno));
+
+    /*
+     * squeeze the tuples.
+     */
+    roffnum = FirstOffsetNumber;
+    for(;;) {
+       hitem = (HashItem) PageGetItem(rpage, PageGetItemId(rpage, roffnum));
+       itemsz = IndexTupleDSize(hitem->hash_itup) 
+           + (sizeof(HashItemData) - sizeof(IndexTupleData));
+       itemsz = DOUBLEALIGN(itemsz);
+       
+       /*
+        * walk up the bucket chain, looking for a page big enough for
+        * this item.
+        */
+       while (PageGetFreeSpace(wpage) < itemsz) {
+           wblkno = wopaque->hasho_nextblkno;
+
+           _hash_wrtbuf(rel, wbuf);
+
+           if (!BlockNumberIsValid(wblkno) || (rblkno == wblkno)) {
+               _hash_wrtbuf(rel, rbuf);
+               /* wbuf is already released */
+               return;
+           }
+           
+           wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE);
+           wpage = BufferGetPage(wbuf);
+           _hash_checkpage(wpage, LH_OVERFLOW_PAGE);
+           Assert(!PageIsEmpty(wpage));
+           wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
+           Assert(wopaque->hasho_bucket == bucket);
+       }
+       
+       /* 
+        * if we're here, we have found room so insert on the "write"
+        * page.
+        */
+       woffnum = OffsetNumberNext(PageGetMaxOffsetNumber(wpage));
+       (void) PageAddItem(wpage, (Item) hitem, itemsz, woffnum, LP_USED);
+       
+       /* 
+        * delete the tuple from the "read" page.
+        * PageIndexTupleDelete repacks the ItemId array, so 'roffnum'
+        * will be "advanced" to the "next" ItemId.
+        */
+       PageIndexTupleDelete(rpage, roffnum);
+       _hash_wrtnorelbuf(rel, rbuf);
+       
+       /*
+        * if the "read" page is now empty because of the deletion,
+        * free it.
+        */
+       if (PageIsEmpty(rpage) && (ropaque->hasho_flag & LH_OVERFLOW_PAGE)) {
+           rblkno = ropaque->hasho_prevblkno;
+           Assert(BlockNumberIsValid(rblkno));
+
+           /*
+            * free this overflow page.  the extra _hash_relbuf is
+            * because _hash_freeovflpage gratuitously returns the
+            * next page (we want the previous page and will get it
+            * ourselves later).
+            */
+           rbuf = _hash_freeovflpage(rel, rbuf);
+           if (BufferIsValid(rbuf)) {
+               _hash_relbuf(rel, rbuf, HASH_WRITE);
+           }
+           
+           if (rblkno == wblkno) {
+               /* rbuf is already released */
+               _hash_wrtbuf(rel, wbuf);
+               return;
+           }
+           
+           rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE);
+           rpage = BufferGetPage(rbuf);
+           _hash_checkpage(rpage, LH_OVERFLOW_PAGE);
+           Assert(!PageIsEmpty(rpage));
+           ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
+           Assert(ropaque->hasho_bucket == bucket);
+
+           roffnum = FirstOffsetNumber;
+       }
+    }
+}
diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c

new file mode 100644 (file)

index 0000000..2b5dbae
--- /dev/null
+++ b/src/backend/access/hash/hashpage.c
@@ -0,0 +1,669 @@
+/*-------------------------------------------------------------------------
+ *
+ * hashpage.c--
+ *    Hash table page management code for the Postgres hash access method
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header$
+ *
+ * NOTES
+ *    Postgres hash pages look like ordinary relation pages.  The opaque
+ *    data at high addresses includes information about the page including
+ *    whether a page is an overflow page or a true bucket, the block 
+ *    numbers of the preceding and following pages, and the overflow
+ *    address of the page if it is an overflow page.
+ *
+ *    The first page in a hash relation, page zero, is special -- it stores
+ *    information describing the hash table; it is referred to as teh
+ *    "meta page." Pages one and higher store the actual data. 
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "storage/bufmgr.h"
+#include "storage/bufpage.h"
+
+#include "utils/elog.h"
+#include "utils/rel.h"
+#include "utils/excid.h"
+
+#include "access/genam.h"
+#include "access/hash.h"
+
+static void _hash_setpagelock(Relation rel, BlockNumber blkno, int access);
+static void _hash_unsetpagelock(Relation rel, BlockNumber blkno, int access);
+static void _hash_splitpage(Relation rel, Buffer metabuf, Bucket obucket, Bucket nbucket);
+
+/*  
+ *  We use high-concurrency locking on hash indices.  There are two cases in
+ *  which we don't do locking.  One is when we're building the index.
+ *  Since the creating transaction has not committed, no one can see
+ *  the index, and there's no reason to share locks.  The second case
+ *  is when we're just starting up the database system.  We use some
+ *  special-purpose initialization code in the relation cache manager
+ *  (see utils/cache/relcache.c) to allow us to do indexed scans on
+ *  the system catalogs before we'd normally be able to.  This happens
+ *  before the lock table is fully initialized, so we can't use it.
+ *  Strictly speaking, this violates 2pl, but we don't do 2pl on the
+ *  system catalogs anyway.
+ */
+
+
+#define USELOCKING     (!BuildingHash && !IsInitProcessingMode())
+
+
+/*
+ *  _hash_metapinit() -- Initialize the metadata page of a hash index,
+ *             the two buckets that we begin with and the initial
+ *             bitmap page.
+ */
+void
+_hash_metapinit(Relation rel)
+{
+    HashMetaPage metap;
+    HashPageOpaque pageopaque;
+    Buffer metabuf;
+    Buffer buf;
+    Page pg;
+    int nbuckets;
+    uint32 nelem;                      /* number elements */
+    uint32 lg2nelem;                   /* _hash_log2(nelem)   */
+    uint32 nblocks;
+    uint16 i;
+    
+    /* can't be sharing this with anyone, now... */
+    if (USELOCKING)
+       RelationSetLockForWrite(rel);
+    
+    if ((nblocks = RelationGetNumberOfBlocks(rel)) != 0) {
+       elog(WARN, "Cannot initialize non-empty hash table %s",
+            RelationGetRelationName(rel));
+    }
+    
+    metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE);
+    pg = BufferGetPage(metabuf);
+    metap = (HashMetaPage) pg;
+    _hash_pageinit(pg, BufferGetPageSize(metabuf));
+    
+    metap->hashm_magic                 = HASH_MAGIC;
+    metap->hashm_version       = HASH_VERSION;
+    metap->hashm_nkeys                 = 0;
+    metap->hashm_nmaps                 = 0;
+    metap->hashm_ffactor       = DEFAULT_FFACTOR;
+    metap->hashm_bsize                 = BufferGetPageSize(metabuf);
+    metap->hashm_bshift                = _hash_log2(metap->hashm_bsize);
+    for (i = metap->hashm_bshift; i > 0; --i) {
+       if ((1 << i) < (metap->hashm_bsize -
+                       (DOUBLEALIGN(sizeof(PageHeaderData)) +
+                        DOUBLEALIGN(sizeof(HashPageOpaqueData))))) {
+           break;
+       }
+    }
+    Assert(i);
+    metap->hashm_bmsize                = 1 << i;
+    metap->hashm_procid                = index_getprocid(rel, 1, HASHPROC);
+    
+    /* 
+     * Make nelem = 2 rather than 0 so that we end up allocating space 
+     * for the next greater power of two number of buckets. 
+     */
+    nelem = 2;
+    lg2nelem = 1;              /*_hash_log2(MAX(nelem, 2)) */
+    nbuckets = 2;              /*1 << lg2nelem */
+    
+    memset((char *) metap->hashm_spares, 0, sizeof(metap->hashm_spares));
+    memset((char *) metap->hashm_mapp, 0, sizeof(metap->hashm_mapp));
+    
+    metap->hashm_spares[lg2nelem]     = 2;     /* lg2nelem + 1 */
+    metap->hashm_spares[lg2nelem + 1] = 2;     /* lg2nelem + 1 */
+    metap->hashm_ovflpoint            = 1;     /* lg2nelem */
+    metap->hashm_lastfreed            = 2;
+    
+    metap->hashm_maxbucket = metap->hashm_lowmask = 1;         /* nbuckets - 1 */
+    metap->hashm_highmask  = 3;                         /* (nbuckets << 1) - 1 */
+    
+    pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
+    pageopaque->hasho_oaddr = InvalidOvflAddress;
+    pageopaque->hasho_prevblkno = InvalidBlockNumber;
+    pageopaque->hasho_nextblkno = InvalidBlockNumber;
+    pageopaque->hasho_flag = LH_META_PAGE;
+    pageopaque->hasho_bucket = -1;
+
+    /* 
+     * First bitmap page is at: splitpoint lg2nelem page offset 1 which
+     * turns out to be page 3. Couldn't initialize page 3  until we created
+     * the first two buckets above. 
+     */
+    if (_hash_initbitmap(rel, metap, OADDR_OF(lg2nelem, 1), lg2nelem + 1, 0))
+       elog(WARN, "Problem with _hash_initbitmap.");
+
+    /* all done */
+    _hash_wrtnorelbuf(rel, metabuf);
+    
+    /* 
+     * initialize the first two buckets 
+     */
+    for (i = 0; i <= 1; i++) {
+       buf = _hash_getbuf(rel, BUCKET_TO_BLKNO(i), HASH_WRITE);
+       pg = BufferGetPage(buf);
+       _hash_pageinit(pg, BufferGetPageSize(buf));
+       pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
+       pageopaque->hasho_oaddr = InvalidOvflAddress;
+       pageopaque->hasho_prevblkno = InvalidBlockNumber;
+       pageopaque->hasho_nextblkno = InvalidBlockNumber;
+       pageopaque->hasho_flag = LH_BUCKET_PAGE;
+       pageopaque->hasho_bucket = i;
+       _hash_wrtbuf(rel, buf);
+    }
+    
+    _hash_relbuf(rel, metabuf, HASH_WRITE);
+    
+    if (USELOCKING)
+       RelationUnsetLockForWrite(rel);
+}
+
+/*
+ *  _hash_getbuf() -- Get a buffer by block number for read or write.
+ *
+ *     When this routine returns, the appropriate lock is set on the
+ *     requested buffer its reference count is correct.
+ *
+ *     XXX P_NEW is not used because, unlike the tree structures, we
+ *     need the bucket blocks to be at certain block numbers.  we must
+ *     depend on the caller to call _hash_pageinit on the block if it
+ *     knows that this is a new block.
+ */
+Buffer
+_hash_getbuf(Relation rel, BlockNumber blkno, int access)
+{
+    Buffer buf;
+    
+    if (blkno == P_NEW) {
+       elog(WARN, "_hash_getbuf: internal error: hash AM does not use P_NEW");
+    }
+    switch (access) {
+    case HASH_WRITE:
+    case HASH_READ:
+       _hash_setpagelock(rel, blkno, access);
+       break;
+    default:
+       elog(WARN, "_hash_getbuf: invalid access (%d) on new blk: %.*s",
+            access, NAMEDATALEN, RelationGetRelationName(rel));
+       break;
+    }
+    buf = ReadBuffer(rel, blkno);
+    
+    /* ref count and lock type are correct */
+    return (buf);
+}
+
+/*
+ *  _hash_relbuf() -- release a locked buffer.
+ */
+void
+_hash_relbuf(Relation rel, Buffer buf, int access)
+{
+    BlockNumber blkno;
+    
+    blkno = BufferGetBlockNumber(buf);
+    
+    switch (access) {
+    case HASH_WRITE:
+    case HASH_READ:
+       _hash_unsetpagelock(rel, blkno, access);
+       break;
+    default:
+       elog(WARN, "_hash_relbuf: invalid access (%d) on blk %x: %.*s",
+            access, blkno, NAMEDATALEN, RelationGetRelationName(rel));
+    }
+    
+    ReleaseBuffer(buf);
+}
+
+/*
+ *  _hash_wrtbuf() -- write a hash page to disk.
+ *
+ *     This routine releases the lock held on the buffer and our reference
+ *     to it.  It is an error to call _hash_wrtbuf() without a write lock
+ *     or a reference to the buffer.
+ */
+void
+_hash_wrtbuf(Relation rel, Buffer buf)
+{
+    BlockNumber blkno;
+    
+    blkno = BufferGetBlockNumber(buf);
+    WriteBuffer(buf);
+    _hash_unsetpagelock(rel, blkno, HASH_WRITE);
+}
+
+/*
+ *  _hash_wrtnorelbuf() -- write a hash page to disk, but do not release
+ *                      our reference or lock.
+ *
+ *     It is an error to call _hash_wrtnorelbuf() without a write lock
+ *     or a reference to the buffer.
+ */
+void
+_hash_wrtnorelbuf(Relation rel, Buffer buf)
+{
+    BlockNumber blkno;
+    
+    blkno = BufferGetBlockNumber(buf);
+    WriteNoReleaseBuffer(buf);
+}
+
+Page
+_hash_chgbufaccess(Relation rel,
+                  Buffer *bufp,
+                  int from_access,
+                  int to_access)
+{
+    BlockNumber blkno;
+    
+    blkno = BufferGetBlockNumber(*bufp);
+    
+    switch (from_access) {
+    case HASH_WRITE:
+       _hash_wrtbuf(rel, *bufp);
+       break;
+    case HASH_READ:
+       _hash_relbuf(rel, *bufp, from_access);
+       break;
+    default:
+       elog(WARN, "_hash_chgbufaccess: invalid access (%d) on blk %x: %.*s",
+            from_access, blkno, NAMEDATALEN, RelationGetRelationName(rel));
+       break;
+    }
+    *bufp = _hash_getbuf(rel, blkno, to_access);
+    return (BufferGetPage(*bufp));
+}
+
+/*
+ *  _hash_pageinit() -- Initialize a new page.
+ */
+void
+_hash_pageinit(Page page, Size size)
+{
+    Assert(((PageHeader) page)->pd_lower == 0);
+    Assert(((PageHeader) page)->pd_upper == 0);
+    Assert(((PageHeader) page)->pd_special == 0);
+
+    /*
+     *  Cargo-cult programming -- don't really need this to be zero, but
+     *  creating new pages is an infrequent occurrence and it makes me feel
+     *  good when I know they're empty.
+     */
+    memset(page, 0, size);
+    
+    PageInit(page, size, sizeof(HashPageOpaqueData));
+}
+
+static void
+_hash_setpagelock(Relation rel,
+                 BlockNumber blkno,
+                 int access)
+{
+    ItemPointerData iptr;
+    
+    if (USELOCKING) {
+       ItemPointerSet(&iptr, blkno, 1);
+       
+       switch (access) {
+       case HASH_WRITE:
+           RelationSetSingleWLockPage(rel, &iptr);
+           break;
+       case HASH_READ:
+           RelationSetSingleRLockPage(rel, &iptr);
+           break;
+       default:
+           elog(WARN, "_hash_setpagelock: invalid access (%d) on blk %x: %.*s",
+                access, blkno, NAMEDATALEN, RelationGetRelationName(rel));
+           break;
+       }
+    }
+}
+
+static void
+_hash_unsetpagelock(Relation rel,
+                   BlockNumber blkno,
+                   int access)
+{
+    ItemPointerData iptr;
+    
+    if (USELOCKING) {
+       ItemPointerSet(&iptr, blkno, 1);
+       
+       switch (access) {
+       case HASH_WRITE:
+           RelationUnsetSingleWLockPage(rel, &iptr);
+           break;
+       case HASH_READ:
+           RelationUnsetSingleRLockPage(rel, &iptr);
+           break;
+       default:
+           elog(WARN, "_hash_unsetpagelock: invalid access (%d) on blk %x: %.*s",
+                access, blkno, NAMEDATALEN, RelationGetRelationName(rel));
+           break;
+       }
+    }
+}
+
+void
+_hash_pagedel(Relation rel, ItemPointer tid)
+{
+    Buffer buf;
+    Buffer metabuf;
+    Page page;
+    BlockNumber blkno;
+    OffsetNumber offno;
+    HashMetaPage metap;
+    HashPageOpaque opaque;
+    
+    blkno = ItemPointerGetBlockNumber(tid);
+    offno = ItemPointerGetOffsetNumber(tid);
+    
+    buf = _hash_getbuf(rel, blkno, HASH_WRITE);
+    page = BufferGetPage(buf);
+    _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
+    opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+    
+    PageIndexTupleDelete(page, offno);
+    _hash_wrtnorelbuf(rel, buf);
+    
+    if (PageIsEmpty(page) && (opaque->hasho_flag & LH_OVERFLOW_PAGE)) {
+       buf = _hash_freeovflpage(rel, buf);
+       if (BufferIsValid(buf)) {
+           _hash_relbuf(rel, buf, HASH_WRITE);
+       }
+    } else {
+       _hash_relbuf(rel, buf, HASH_WRITE);
+    }
+    
+    metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE);
+    metap = (HashMetaPage) BufferGetPage(metabuf);
+    _hash_checkpage((Page) metap, LH_META_PAGE);
+    ++metap->hashm_nkeys;
+    _hash_wrtbuf(rel, metabuf);
+}
+
+void
+_hash_expandtable(Relation rel, Buffer metabuf)
+{
+    HashMetaPage metap;
+    Bucket old_bucket;
+    Bucket new_bucket;
+    uint32 spare_ndx;
+    
+/*    elog(DEBUG, "_hash_expandtable: expanding..."); */
+
+    metap = (HashMetaPage) BufferGetPage(metabuf);
+    _hash_checkpage((Page) metap, LH_META_PAGE);
+    
+    metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE);   
+    new_bucket = ++metap->MAX_BUCKET;
+    metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ);   
+    old_bucket = (metap->MAX_BUCKET & metap->LOW_MASK);
+    
+    /*
+     * If the split point is increasing (MAX_BUCKET's log base 2
+     * * increases), we need to copy the current contents of the spare
+     * split bucket to the next bucket.
+     */
+    spare_ndx = _hash_log2(metap->MAX_BUCKET + 1);
+    if (spare_ndx > metap->OVFL_POINT) {
+       
+       metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE);        
+       metap->SPARES[spare_ndx] = metap->SPARES[metap->OVFL_POINT];
+       metap->OVFL_POINT = spare_ndx;
+       metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ);        
+    }
+    
+    if (new_bucket > metap->HIGH_MASK) {
+       
+       /* Starting a new doubling */
+       metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_READ, HASH_WRITE);        
+       metap->LOW_MASK = metap->HIGH_MASK;
+       metap->HIGH_MASK = new_bucket | metap->LOW_MASK;
+       metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, HASH_WRITE, HASH_READ);        
+       
+    }
+    /* Relocate records to the new bucket */
+    _hash_splitpage(rel, metabuf, old_bucket, new_bucket);
+}
+
+
+/*
+ * _hash_splitpage -- split 'obucket' into 'obucket' and 'nbucket'
+ *
+ * this routine is actually misnamed -- we are splitting a bucket that
+ * consists of a base bucket page and zero or more overflow (bucket
+ * chain) pages.
+ */
+static void
+_hash_splitpage(Relation rel,
+               Buffer metabuf,
+               Bucket obucket,
+               Bucket nbucket)
+{
+    Bucket bucket;
+    Buffer obuf;
+    Buffer nbuf;
+    Buffer ovflbuf;
+    BlockNumber oblkno;
+    BlockNumber nblkno;
+    bool null;
+    Datum datum;
+    HashItem hitem;
+    HashPageOpaque oopaque;
+    HashPageOpaque nopaque;
+    HashMetaPage metap;
+    IndexTuple itup;
+    int itemsz;
+    OffsetNumber ooffnum;
+    OffsetNumber noffnum;
+    OffsetNumber omaxoffnum;
+    Page opage;
+    Page npage;
+    TupleDesc itupdesc;
+    
+/*    elog(DEBUG, "_hash_splitpage: splitting %d into %d,%d",
+        obucket, obucket, nbucket);
+*/
+    metap = (HashMetaPage) BufferGetPage(metabuf);
+    _hash_checkpage((Page) metap, LH_META_PAGE);
+    
+    /* get the buffers & pages */
+    oblkno = BUCKET_TO_BLKNO(obucket);
+    nblkno = BUCKET_TO_BLKNO(nbucket);
+    obuf = _hash_getbuf(rel, oblkno, HASH_WRITE);
+    nbuf = _hash_getbuf(rel, nblkno, HASH_WRITE);
+    opage = BufferGetPage(obuf);
+    npage = BufferGetPage(nbuf);
+
+    /* initialize the new bucket */
+    _hash_pageinit(npage, BufferGetPageSize(nbuf));
+    nopaque = (HashPageOpaque) PageGetSpecialPointer(npage);
+    nopaque->hasho_prevblkno = InvalidBlockNumber;
+    nopaque->hasho_nextblkno = InvalidBlockNumber;
+    nopaque->hasho_flag = LH_BUCKET_PAGE;
+    nopaque->hasho_oaddr = InvalidOvflAddress;
+    nopaque->hasho_bucket = nbucket;
+    _hash_wrtnorelbuf(rel, nbuf);
+    
+    /*
+     * make sure the old bucket isn't empty.  advance 'opage' and
+     * friends through the overflow bucket chain until we find a
+     * non-empty page.
+     *
+     * XXX we should only need this once, if we are careful to
+     * preserve the invariant that overflow pages are never empty.
+     */
+    _hash_checkpage(opage, LH_BUCKET_PAGE);
+    oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
+    if (PageIsEmpty(opage)) {
+       oblkno = oopaque->hasho_nextblkno;
+       _hash_relbuf(rel, obuf, HASH_WRITE);
+       if (!BlockNumberIsValid(oblkno)) {
+           /*
+            * the old bucket is completely empty; of course, the new
+            * bucket will be as well, but since it's a base bucket
+            * page we don't care.
+            */
+           _hash_relbuf(rel, nbuf, HASH_WRITE);
+           return;
+       }
+       obuf = _hash_getbuf(rel, oblkno, HASH_WRITE);
+       opage = BufferGetPage(obuf);
+       _hash_checkpage(opage, LH_OVERFLOW_PAGE);
+       if (PageIsEmpty(opage)) {
+           elog(WARN, "_hash_splitpage: empty overflow page %d", oblkno);
+       }
+       oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
+    }
+
+    /*
+     * we are now guaranteed that 'opage' is not empty.  partition the
+     * tuples in the old bucket between the old bucket and the new
+     * bucket, advancing along their respective overflow bucket chains
+     * and adding overflow pages as needed.
+     */
+    ooffnum = FirstOffsetNumber;
+    omaxoffnum = PageGetMaxOffsetNumber(opage); 
+    for (;;) {
+       /*
+        * at each iteration through this loop, each of these variables
+        * should be up-to-date: obuf opage oopaque ooffnum omaxoffnum
+        */
+
+       /* check if we're at the end of the page */
+       if (ooffnum > omaxoffnum) {
+           /* at end of page, but check for overflow page */
+           oblkno = oopaque->hasho_nextblkno;          
+           if (BlockNumberIsValid(oblkno)) {
+               /*
+                * we ran out of tuples on this particular page, but
+                * we have more overflow pages; re-init values.
+                */
+               _hash_wrtbuf(rel, obuf);
+               obuf = _hash_getbuf(rel, oblkno, HASH_WRITE);
+               opage = BufferGetPage(obuf);
+               _hash_checkpage(opage, LH_OVERFLOW_PAGE);
+               oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
+               
+               /* we're guaranteed that an ovfl page has at least 1 tuple */
+               if (PageIsEmpty(opage)) {
+                   elog(WARN, "_hash_splitpage: empty ovfl page %d!",
+                        oblkno);
+               }
+               ooffnum = FirstOffsetNumber;
+               omaxoffnum = PageGetMaxOffsetNumber(opage);
+           } else {
+               /*
+                * we're at the end of the bucket chain, so now we're
+                * really done with everything.  before quitting, call
+                * _hash_squeezebucket to ensure the tuples in the
+                * bucket (including the overflow pages) are packed as
+                * tightly as possible.
+                */
+               _hash_wrtbuf(rel, obuf);
+               _hash_wrtbuf(rel, nbuf);
+               _hash_squeezebucket(rel, metap, obucket);
+               return;
+           }
+       }
+       
+       /* hash on the tuple */
+       hitem = (HashItem) PageGetItem(opage, PageGetItemId(opage, ooffnum));
+       itup = &(hitem->hash_itup);
+       itupdesc = RelationGetTupleDescriptor(rel);
+       datum = index_getattr(itup, 1, itupdesc, &null);
+       bucket = _hash_call(rel, metap, datum);
+       
+       if (bucket == nbucket) {
+           /*
+            * insert the tuple into the new bucket.  if it doesn't
+            * fit on the current page in the new bucket, we must
+            * allocate a new overflow page and place the tuple on
+            * that page instead.
+            */
+           itemsz = IndexTupleDSize(hitem->hash_itup) 
+               + (sizeof(HashItemData) - sizeof(IndexTupleData));
+
+           itemsz = DOUBLEALIGN(itemsz);
+           
+           if (PageGetFreeSpace(npage) < itemsz) {
+               ovflbuf = _hash_addovflpage(rel, &metabuf, nbuf);
+               _hash_wrtbuf(rel, nbuf);
+               nbuf = ovflbuf;
+               npage = BufferGetPage(nbuf);
+               _hash_checkpage(npage, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
+           }
+           
+           noffnum = OffsetNumberNext(PageGetMaxOffsetNumber(npage));
+           (void) PageAddItem(npage, (Item) hitem, itemsz, noffnum, LP_USED);
+           _hash_wrtnorelbuf(rel, nbuf);
+           
+           /*
+            * now delete the tuple from the old bucket.  after this
+            * section of code, 'ooffnum' will actually point to the
+            * ItemId to which we would point if we had advanced it
+            * before the deletion (PageIndexTupleDelete repacks the
+            * ItemId array).  this also means that 'omaxoffnum' is
+            * exactly one less than it used to be, so we really can
+            * just decrement it instead of calling
+            * PageGetMaxOffsetNumber.
+            */
+           PageIndexTupleDelete(opage, ooffnum);
+           _hash_wrtnorelbuf(rel, obuf);
+           omaxoffnum = OffsetNumberPrev(omaxoffnum);
+           
+           /*
+            * tidy up.  if the old page was an overflow page and it
+            * is now empty, we must free it (we want to preserve the
+            * invariant that overflow pages cannot be empty).
+            */
+           if (PageIsEmpty(opage) &&
+               (oopaque->hasho_flag & LH_OVERFLOW_PAGE)) {
+               obuf = _hash_freeovflpage(rel, obuf);
+               
+               /* check that we're not through the bucket chain */
+               if (BufferIsInvalid(obuf)) {
+                   _hash_wrtbuf(rel, nbuf);
+                   _hash_squeezebucket(rel, metap, obucket);
+                   return;
+               }
+               
+               /* 
+                * re-init. again, we're guaranteed that an ovfl page
+                * has at least one tuple.
+                */
+               opage = BufferGetPage(obuf);
+               _hash_checkpage(opage, LH_OVERFLOW_PAGE);
+               oblkno = BufferGetBlockNumber(obuf);
+               oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
+               if (PageIsEmpty(opage)) {
+                   elog(WARN, "_hash_splitpage: empty overflow page %d",
+                        oblkno);
+               }
+               ooffnum = FirstOffsetNumber;
+               omaxoffnum = PageGetMaxOffsetNumber(opage);
+           }
+       } else {
+           /*
+            * the tuple stays on this page.  we didn't move anything,
+            * so we didn't delete anything and therefore we don't
+            * have to change 'omaxoffnum'.
+            *
+            * XXX any hash value from [0, nbucket-1] will map to this
+            * bucket, which doesn't make sense to me.
+            */
+           ooffnum = OffsetNumberNext(ooffnum);
+       }
+    }
+    /*NOTREACHED*/
+}
diff --git a/src/backend/access/hash/hashscan.c b/src/backend/access/hash/hashscan.c

new file mode 100644 (file)

index 0000000..717c004
--- /dev/null
+++ b/src/backend/access/hash/hashscan.c
@@ -0,0 +1,172 @@
+/*-------------------------------------------------------------------------
+ *
+ * hashscan.c--
+ *    manage scans on hash tables
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header$
+ *
+ * NOTES
+ *    Because we can be doing an index scan on a relation while we
+ *    update it, we need to avoid missing data that moves around in
+ *    the index.  The routines and global variables in this file
+ *    guarantee that all scans in the local address space stay
+ *    correctly positioned.  This is all we need to worry about, since
+ *    write locking guarantees that no one else will be on the same
+ *    page at the same time as we are.
+ *
+ *    The scheme is to manage a list of active scans in the current
+ *    backend.  Whenever we add or remove records from an index, we
+ *    check the list of active scans to see if any has been affected.
+ *    A scan is affected only if it is on the same relation, and the
+ *    same page, as the update.
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "storage/bufmgr.h"
+#include "storage/bufpage.h"
+
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/rel.h"
+#include "utils/excid.h"
+
+#include "access/heapam.h"
+#include "access/genam.h"
+#include "access/sdir.h"
+#include "access/hash.h"
+
+static void _hash_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno);
+static bool _hash_scantouched(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno);
+
+typedef struct HashScanListData {
+    IndexScanDesc              hashsl_scan;
+    struct HashScanListData    *hashsl_next;
+} HashScanListData;
+
+typedef HashScanListData       *HashScanList;
+
+static HashScanList    HashScans = (HashScanList) NULL;
+
+/*
+ *  _Hash_regscan() -- register a new scan.
+ */
+void
+_hash_regscan(IndexScanDesc scan)
+{
+    HashScanList new_el;
+    
+    new_el = (HashScanList) palloc(sizeof(HashScanListData));
+    new_el->hashsl_scan = scan;
+    new_el->hashsl_next = HashScans;
+    HashScans = new_el;
+}
+
+/*
+ *  _hash_dropscan() -- drop a scan from the scan list
+ */
+void
+_hash_dropscan(IndexScanDesc scan)
+{
+    HashScanList chk, last;
+    
+    last = (HashScanList) NULL;
+    for (chk = HashScans;
+        chk != (HashScanList) NULL && chk->hashsl_scan != scan;
+        chk = chk->hashsl_next) {
+       last = chk;
+    }
+    
+    if (chk == (HashScanList) NULL)
+       elog(WARN, "hash scan list trashed; can't find 0x%lx", scan);
+    
+    if (last == (HashScanList) NULL)
+       HashScans = chk->hashsl_next;
+    else
+       last->hashsl_next = chk->hashsl_next;
+    
+#ifdef PERFECT_MEM
+    pfree (chk);
+#endif /* PERFECT_MEM */
+}
+
+void
+_hash_adjscans(Relation rel, ItemPointer tid)
+{
+    HashScanList l;
+    Oid relid;
+    
+    relid = rel->rd_id;
+    for (l = HashScans; l != (HashScanList) NULL; l = l->hashsl_next) {
+       if (relid == l->hashsl_scan->relation->rd_id)
+           _hash_scandel(l->hashsl_scan, ItemPointerGetBlockNumber(tid),
+                         ItemPointerGetOffsetNumber(tid));
+    }
+}
+
+static void
+_hash_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno)
+{
+    ItemPointer current;
+    Buffer buf;
+    Buffer metabuf;
+    HashScanOpaque so;
+    
+    if (!_hash_scantouched(scan, blkno, offno))
+       return;
+    
+    metabuf = _hash_getbuf(scan->relation, HASH_METAPAGE, HASH_READ);
+    
+    so = (HashScanOpaque) scan->opaque;
+    buf = so->hashso_curbuf;
+    
+    current = &(scan->currentItemData);
+    if (ItemPointerIsValid(current)
+       && ItemPointerGetBlockNumber(current) == blkno
+       && ItemPointerGetOffsetNumber(current) >= offno) {
+       _hash_step(scan, &buf, BackwardScanDirection, metabuf);
+       so->hashso_curbuf = buf;
+    }
+    
+    current = &(scan->currentMarkData);
+    if (ItemPointerIsValid(current)
+       && ItemPointerGetBlockNumber(current) == blkno
+       && ItemPointerGetOffsetNumber(current) >= offno) {
+       ItemPointerData tmp;
+       tmp = *current;
+       *current = scan->currentItemData;
+       scan->currentItemData = tmp;
+       _hash_step(scan, &buf, BackwardScanDirection, metabuf);
+       so->hashso_mrkbuf = buf;
+       tmp = *current;
+       *current = scan->currentItemData;
+       scan->currentItemData = tmp;
+    }
+}
+
+static bool
+_hash_scantouched(IndexScanDesc scan,
+                 BlockNumber blkno,
+                 OffsetNumber offno)
+{
+    ItemPointer current;
+    
+    current = &(scan->currentItemData);
+    if (ItemPointerIsValid(current)
+       && ItemPointerGetBlockNumber(current) == blkno
+       && ItemPointerGetOffsetNumber(current) >= offno)
+       return (true);
+    
+    current = &(scan->currentMarkData);
+    if (ItemPointerIsValid(current)
+       && ItemPointerGetBlockNumber(current) == blkno
+       && ItemPointerGetOffsetNumber(current) >= offno)
+       return (true);
+    
+    return (false);
+}
diff --git a/src/backend/access/hash/hashsearch.c b/src/backend/access/hash/hashsearch.c

new file mode 100644 (file)

index 0000000..2a4934c
--- /dev/null
+++ b/src/backend/access/hash/hashsearch.c
@@ -0,0 +1,425 @@
+/*-------------------------------------------------------------------------
+ *
+ * hashsearch.c--
+ *    search code for postgres hash tables
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header$
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "storage/bufmgr.h"
+#include "storage/bufpage.h"
+
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/rel.h"
+#include "utils/excid.h"
+
+#include "fmgr.h"
+
+#include "access/heapam.h"
+#include "access/genam.h"
+#include "access/skey.h"
+#include "access/sdir.h"
+#include "access/hash.h"
+
+/*
+ *  _hash_search() -- Finds the page/bucket that the contains the
+ *  scankey and loads it into *bufP.  the buffer has a read lock.
+ */
+void
+_hash_search(Relation rel,
+            int keysz,
+            ScanKey scankey,
+            Buffer *bufP,
+            HashMetaPage metap)
+{
+    BlockNumber blkno;
+    Datum keyDatum;
+    Bucket bucket;
+
+    if (scankey == (ScanKey) NULL ||
+       (keyDatum = scankey[0].sk_argument) == (Datum) NULL) {
+       /* 
+        * If the scankey argument is NULL, all tuples will satisfy
+        * the scan so we start the scan at the first bucket (bucket
+        * 0).
+        */
+       bucket = 0;
+    } else {
+       bucket = _hash_call(rel, metap, keyDatum);
+    }
+
+    blkno = BUCKET_TO_BLKNO(bucket);
+    
+    *bufP = _hash_getbuf(rel, blkno, HASH_READ);
+}
+
+/*
+ *  _hash_next() -- Get the next item in a scan.
+ *
+ *     On entry, we have a valid currentItemData in the scan, and a
+ *     read lock on the page that contains that item.  We do not have
+ *     the page pinned.  We return the next item in the scan.  On
+ *     exit, we have the page containing the next item locked but not
+ *     pinned.
+ */
+RetrieveIndexResult
+_hash_next(IndexScanDesc scan, ScanDirection dir)
+{
+    Relation rel;
+    Buffer buf;
+    Buffer metabuf;
+    Page page;
+    OffsetNumber offnum;
+    RetrieveIndexResult res;
+    ItemPointer current;
+    ItemPointer iptr;
+    HashItem hitem;
+    IndexTuple itup;
+    HashScanOpaque so;
+
+    rel = scan->relation;
+    so = (HashScanOpaque) scan->opaque; 
+    current = &(scan->currentItemData);
+
+    metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ);
+
+    /*
+     *  XXX 10 may 91:  somewhere there's a bug in our management of the
+     *  cached buffer for this scan.  wei discovered it.  the following
+     *  is a workaround so he can work until i figure out what's going on.
+     */
+
+    if (!BufferIsValid(so->hashso_curbuf)) {
+       so->hashso_curbuf = _hash_getbuf(rel,
+                                        ItemPointerGetBlockNumber(current),
+                                        HASH_READ);
+    }
+
+    /* we still have the buffer pinned and locked */
+    buf = so->hashso_curbuf;
+
+    /*
+     * step to next valid tuple.  note that _hash_step releases our
+     * lock on 'metabuf'; if we switch to a new 'buf' while looking
+     * for the next tuple, we come back with a lock on that buffer.
+     */
+    if (!_hash_step(scan, &buf, dir, metabuf)) {
+       return ((RetrieveIndexResult) NULL);
+    }
+
+    /* if we're here, _hash_step found a valid tuple */
+    current = &(scan->currentItemData);
+    offnum = ItemPointerGetOffsetNumber(current);
+    page = BufferGetPage(buf);
+    _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
+    hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum));
+    itup = &hitem->hash_itup;
+    iptr = (ItemPointer) palloc(sizeof(ItemPointerData));
+    memmove((char *) iptr, (char *) &(itup->t_tid),  sizeof(ItemPointerData));
+    res = FormRetrieveIndexResult(current, iptr);
+
+    return (res);
+}
+
+static void
+_hash_readnext(Relation rel,
+              Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
+{
+    BlockNumber blkno;
+
+    blkno = (*opaquep)->hasho_nextblkno;
+    _hash_relbuf(rel, *bufp, HASH_READ);
+    *bufp = InvalidBuffer;
+    if (BlockNumberIsValid(blkno)) {
+       *bufp = _hash_getbuf(rel, blkno, HASH_READ);
+       *pagep = BufferGetPage(*bufp);
+       _hash_checkpage(*pagep, LH_OVERFLOW_PAGE);
+       *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
+       Assert(!PageIsEmpty(*pagep));
+    }
+}
+
+static void
+_hash_readprev(Relation rel,
+              Buffer *bufp, Page *pagep, HashPageOpaque *opaquep)
+{
+    BlockNumber blkno;
+
+    blkno = (*opaquep)->hasho_prevblkno;
+    _hash_relbuf(rel, *bufp, HASH_READ);
+    *bufp = InvalidBuffer;
+    if (BlockNumberIsValid(blkno)) {
+       *bufp = _hash_getbuf(rel, blkno, HASH_READ);
+       *pagep = BufferGetPage(*bufp);
+       _hash_checkpage(*pagep, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
+       *opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
+       if (PageIsEmpty(*pagep)) {
+           Assert((*opaquep)->hasho_flag & LH_BUCKET_PAGE);
+           _hash_relbuf(rel, *bufp, HASH_READ);
+           *bufp = InvalidBuffer;
+       }
+    }
+}
+
+/*
+ *  _hash_first() -- Find the first item in a scan.
+ *
+ *     Return the RetrieveIndexResult of the first item in the tree that
+ *     satisfies the qualificatin associated with the scan descriptor. On
+ *     exit, the page containing the current index tuple is read locked
+ *     and pinned, and the scan's opaque data entry is updated to 
+ *     include the buffer.  
+ */
+RetrieveIndexResult
+_hash_first(IndexScanDesc scan, ScanDirection dir)
+{
+    Relation rel;
+    Buffer buf;
+    Buffer metabuf;
+    Page page;
+    HashPageOpaque opaque;
+    HashMetaPage metap;
+    HashItem hitem;
+    IndexTuple itup;
+    ItemPointer current;
+    ItemPointer iptr;
+    OffsetNumber offnum;
+    RetrieveIndexResult res;
+    HashScanOpaque so;
+
+    rel = scan->relation;
+    so = (HashScanOpaque) scan->opaque;
+    current = &(scan->currentItemData);
+
+    metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ);
+    metap = (HashMetaPage) BufferGetPage(metabuf);
+    _hash_checkpage((Page) metap, LH_META_PAGE);
+
+    /*
+     *  XXX -- The attribute number stored in the scan key is the attno
+     *        in the heap relation.  We need to transmogrify this into
+     *         the index relation attno here.  For the moment, we have
+     *        hardwired attno == 1.
+     */
+
+    /* find the correct bucket page and load it into buf */
+    _hash_search(rel, 1, scan->keyData, &buf, metap);
+    page = BufferGetPage(buf);
+    _hash_checkpage(page, LH_BUCKET_PAGE);
+    opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+
+    /*
+     * if we are scanning forward, we need to find the first non-empty
+     * page (if any) in the bucket chain.  since overflow pages are
+     * never empty, this had better be either the bucket page or the
+     * first overflow page.
+     *
+     * if we are scanning backward, we always go all the way to the
+     * end of the bucket chain.
+     */
+    if (PageIsEmpty(page)) {
+       if (BlockNumberIsValid(opaque->hasho_nextblkno)) {
+           _hash_readnext(rel, &buf, &page, &opaque);
+       } else {
+           ItemPointerSetInvalid(current);
+           so->hashso_curbuf = InvalidBuffer;
+           return ((RetrieveIndexResult) NULL);
+       }
+    }
+    if (ScanDirectionIsBackward(dir)) {
+       while (BlockNumberIsValid(opaque->hasho_nextblkno)) {
+           _hash_readnext(rel, &buf, &page, &opaque);
+       }
+    }
+
+    if (!_hash_step(scan, &buf, dir, metabuf)) {
+       return ((RetrieveIndexResult) NULL);
+    }
+
+    /* if we're here, _hash_step found a valid tuple */
+    current = &(scan->currentItemData);
+    offnum = ItemPointerGetOffsetNumber(current);
+    page = BufferGetPage(buf);
+    _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
+    hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum));
+    itup = &hitem->hash_itup;
+    iptr = (ItemPointer) palloc(sizeof(ItemPointerData));
+    memmove((char *) iptr, (char *) &(itup->t_tid), sizeof(ItemPointerData));
+    res = FormRetrieveIndexResult(current, iptr);
+
+    return (res);
+}
+
+/*
+ *  _hash_step() -- step to the next valid item in a scan in the bucket.
+ *
+ *     If no valid record exists in the requested direction, return
+ *     false.  Else, return true and set the CurrentItemData for the
+ *     scan to the right thing.
+ * 
+ *     'bufP' points to the buffer which contains the current page
+ *     that we'll step through.
+ *
+ *     'metabuf' is released when this returns.
+ */
+bool
+_hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir, Buffer metabuf)
+{
+    Relation rel;
+    ItemPointer current;
+    HashScanOpaque so;
+    int allbuckets;
+    HashMetaPage metap;
+    Buffer buf;
+    Page page;
+    HashPageOpaque opaque;
+    OffsetNumber maxoff;
+    OffsetNumber offnum;
+    Bucket bucket;
+    BlockNumber blkno;
+    HashItem hitem;
+    IndexTuple itup;
+
+    rel = scan->relation;
+    current = &(scan->currentItemData);
+    so = (HashScanOpaque) scan->opaque;
+    allbuckets = (scan->numberOfKeys < 1);
+
+    metap = (HashMetaPage) BufferGetPage(metabuf);
+    _hash_checkpage((Page) metap, LH_META_PAGE);
+
+    buf = *bufP;
+    page = BufferGetPage(buf);
+    _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE);
+    opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+
+    /*
+     * If _hash_step is called from _hash_first, current will not be
+     * valid, so we can't dereference it.  However, in that case, we
+     * presumably want to start at the beginning/end of the page...
+     */
+    maxoff = PageGetMaxOffsetNumber(page);
+    if (ItemPointerIsValid(current)) {
+       offnum = ItemPointerGetOffsetNumber(current);
+    } else {
+       offnum = InvalidOffsetNumber;
+    }
+
+    /*
+     * 'offnum' now points to the last tuple we have seen (if any).
+     *
+     * continue to step through tuples until:
+     *           1) we get to the end of the bucket chain or
+     *           2) we find a valid tuple.
+     */
+    do {
+       bucket = opaque->hasho_bucket;
+
+       switch (dir) {
+       case ForwardScanDirection:
+           if (offnum != InvalidOffsetNumber) {
+               offnum = OffsetNumberNext(offnum);      /* move forward */
+           } else {
+               offnum = FirstOffsetNumber;             /* new page */
+           }
+           while (offnum > maxoff) {
+               /*
+                * either this page is empty (maxoff ==
+                * InvalidOffsetNumber) or we ran off the end.
+                */
+               _hash_readnext(rel, &buf, &page, &opaque);
+               if (BufferIsInvalid(buf)) {     /* end of chain */
+                   if (allbuckets && bucket < metap->hashm_maxbucket) {
+                       ++bucket;
+                       blkno = BUCKET_TO_BLKNO(bucket);
+                       buf = _hash_getbuf(rel, blkno, HASH_READ);
+                       page = BufferGetPage(buf);
+                       _hash_checkpage(page, LH_BUCKET_PAGE);
+                       opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+                       Assert(opaque->hasho_bucket == bucket);
+                       while (PageIsEmpty(page) &&
+                              BlockNumberIsValid(opaque->hasho_nextblkno)) {
+                           _hash_readnext(rel, &buf, &page, &opaque);
+                       }
+                       maxoff = PageGetMaxOffsetNumber(page);
+                       offnum = FirstOffsetNumber;
+                   } else {
+                       maxoff = offnum = InvalidOffsetNumber;
+                       break;  /* while */
+                   }
+               } else {
+                   /* _hash_readnext never returns an empty page */
+                   maxoff = PageGetMaxOffsetNumber(page);
+                   offnum = FirstOffsetNumber;
+               }
+           }
+           break;
+       case BackwardScanDirection:
+           if (offnum != InvalidOffsetNumber) {
+               offnum = OffsetNumberPrev(offnum);      /* move back */
+           } else {
+               offnum = maxoff;                        /* new page */
+           }
+           while (offnum < FirstOffsetNumber) {
+               /*
+                * either this page is empty (offnum ==
+                * InvalidOffsetNumber) or we ran off the end.
+                */
+               _hash_readprev(rel, &buf, &page, &opaque);
+               if (BufferIsInvalid(buf)) {     /* end of chain */
+                   if (allbuckets && bucket > 0) {
+                       --bucket;
+                       blkno = BUCKET_TO_BLKNO(bucket);
+                       buf = _hash_getbuf(rel, blkno, HASH_READ);
+                       page = BufferGetPage(buf);
+                       _hash_checkpage(page, LH_BUCKET_PAGE);
+                       opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+                       Assert(opaque->hasho_bucket == bucket);
+                       while (BlockNumberIsValid(opaque->hasho_nextblkno)) {
+                           _hash_readnext(rel, &buf, &page, &opaque);
+                       }
+                       maxoff = offnum = PageGetMaxOffsetNumber(page);
+                   } else {
+                       maxoff = offnum = InvalidOffsetNumber;
+                       break;  /* while */
+                   }
+               } else {
+                   /* _hash_readprev never returns an empty page */
+                   maxoff = offnum = PageGetMaxOffsetNumber(page);
+               }
+           }
+           break;
+       default:
+           /* NoMovementScanDirection */
+           /* this should not be reached */
+           break;
+       }
+
+       /* we ran off the end of the world without finding a match */
+       if (offnum == InvalidOffsetNumber) {
+           _hash_relbuf(rel, metabuf, HASH_READ);
+           *bufP = so->hashso_curbuf = InvalidBuffer;
+           ItemPointerSetInvalid(current);
+           return(false);
+       }
+       
+       /* get ready to check this tuple */
+       hitem = (HashItem) PageGetItem(page, PageGetItemId(page, offnum));
+       itup = &hitem->hash_itup;
+    } while (!_hash_checkqual(scan, itup));
+   
+    /* if we made it to here, we've found a valid tuple */
+    _hash_relbuf(rel, metabuf, HASH_READ);
+    blkno = BufferGetBlockNumber(buf);
+    *bufP = so->hashso_curbuf = buf;
+    ItemPointerSet(current, blkno, offnum);
+    return(true);
+}
diff --git a/src/backend/access/hash/hashstrat.c b/src/backend/access/hash/hashstrat.c

new file mode 100644 (file)

index 0000000..4e5c24f
--- /dev/null
+++ b/src/backend/access/hash/hashstrat.c
@@ -0,0 +1,104 @@
+/*-------------------------------------------------------------------------
+ *
+ * btstrat.c--
+ *    Srategy map entries for the btree indexed access method
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header$
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "storage/bufpage.h"
+
+#include "utils/elog.h"
+#include "utils/rel.h"
+#include "utils/excid.h"
+
+#include "access/genam.h"
+#include "access/hash.h"
+
+/* 
+ *  only one valid strategy for hash tables: equality. 
+ */
+
+static StrategyNumber  HTNegate[1] = {
+    InvalidStrategy
+};
+
+static StrategyNumber  HTCommute[1] = {
+    HTEqualStrategyNumber
+};
+
+static StrategyNumber  HTNegateCommute[1] = {
+    InvalidStrategy
+};
+
+static StrategyEvaluationData  HTEvaluationData = {
+    /* XXX static for simplicity */
+
+    HTMaxStrategyNumber,
+    (StrategyTransformMap)HTNegate,
+    (StrategyTransformMap)HTCommute,
+    (StrategyTransformMap)HTNegateCommute,
+    {NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL}
+};
+
+/* ----------------------------------------------------------------
+ *     RelationGetHashStrategy
+ * ----------------------------------------------------------------
+ */
+
+StrategyNumber
+_hash_getstrat(Relation rel,
+              AttrNumber attno,
+              RegProcedure proc)
+{
+    StrategyNumber     strat;
+
+    strat = RelationGetStrategy(rel, attno, &HTEvaluationData, proc);
+
+    Assert(StrategyNumberIsValid(strat));
+
+    return (strat);
+}
+
+bool
+_hash_invokestrat(Relation rel,
+                 AttrNumber attno,
+                 StrategyNumber strat,
+                 Datum left,
+                 Datum right)
+{
+    return (RelationInvokeStrategy(rel, &HTEvaluationData, attno, strat, 
+                                  left, right));
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/backend/access/hash/hashutil.c b/src/backend/access/hash/hashutil.c

new file mode 100644 (file)

index 0000000..b7832d4
--- /dev/null
+++ b/src/backend/access/hash/hashutil.c
@@ -0,0 +1,147 @@
+/*-------------------------------------------------------------------------
+ *
+ * btutils.c--
+ *    Utility code for Postgres btree implementation.
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header$
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "storage/bufmgr.h"
+#include "storage/bufpage.h"
+
+#include "fmgr.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/rel.h"
+#include "utils/excid.h"
+
+#include "access/heapam.h"
+#include "access/genam.h"
+#include "access/iqual.h"
+#include "access/hash.h"
+
+ScanKey
+_hash_mkscankey(Relation rel, IndexTuple itup, HashMetaPage metap)
+{
+    ScanKey skey;
+    TupleDesc itupdesc;
+    int natts;
+    AttrNumber i;
+    Datum arg;
+    RegProcedure proc;
+    bool null;
+    
+    natts = rel->rd_rel->relnatts;
+    itupdesc = RelationGetTupleDescriptor(rel);
+    
+    skey = (ScanKey) palloc(natts * sizeof(ScanKeyData));
+    
+    for (i = 0; i < natts; i++) {
+       arg = index_getattr(itup, i + 1, itupdesc, &null);
+       proc = metap->hashm_procid;
+       ScanKeyEntryInitialize(&skey[i],
+                              0x0, (AttrNumber) (i + 1), proc, arg);
+    }
+    
+    return (skey);
+}      
+
+void
+_hash_freeskey(ScanKey skey)
+{
+    pfree(skey);
+}
+
+
+bool
+_hash_checkqual(IndexScanDesc scan, IndexTuple itup)
+{
+    if (scan->numberOfKeys > 0)
+       return (index_keytest(itup, 
+                             RelationGetTupleDescriptor(scan->relation),
+                             scan->numberOfKeys, scan->keyData));
+    else
+       return (true);
+}
+
+HashItem
+_hash_formitem(IndexTuple itup)
+{
+    int nbytes_hitem;
+    HashItem hitem;
+    Size tuplen;
+    
+    /* disallow nulls in hash keys */
+    if (itup->t_info & INDEX_NULL_MASK)
+       elog(WARN, "hash indices cannot include null keys");
+    
+    /* make a copy of the index tuple with room for the sequence number */
+    tuplen = IndexTupleSize(itup);
+    nbytes_hitem = tuplen +
+       (sizeof(HashItemData) - sizeof(IndexTupleData));
+    
+    hitem = (HashItem) palloc(nbytes_hitem);
+    memmove((char *) &(hitem->hash_itup), (char *) itup, tuplen);
+    
+    return (hitem);
+}
+
+Bucket
+_hash_call(Relation rel, HashMetaPage metap, Datum key)
+{
+    uint32 n;
+    Bucket bucket;
+    RegProcedure proc;
+    
+    proc = metap->hashm_procid;
+    n = (uint32) fmgr(proc, key);
+    bucket = n & metap->hashm_highmask;
+    if (bucket > metap->hashm_maxbucket)
+       bucket = bucket & metap->hashm_lowmask;
+    return (bucket);
+}
+
+/*
+ * _hash_log2 -- returns ceil(lg2(num))
+ */
+uint32
+_hash_log2(uint32 num)
+{
+    uint32 i, limit;
+    
+    limit = 1;
+    for (i = 0; limit < num; limit = limit << 1, i++)
+       ;
+    return (i);
+}
+
+/*
+ * _hash_checkpage -- sanity checks on the format of all hash pages
+ */
+void
+_hash_checkpage(Page page, int flags)
+{
+    PageHeader ph = (PageHeader) page;
+    HashPageOpaque opaque;
+
+    Assert(page);
+    Assert(ph->pd_lower >= (sizeof(PageHeaderData) - sizeof(ItemIdData)));
+#if 1
+    Assert(ph->pd_upper <=
+          (BLCKSZ - DOUBLEALIGN(sizeof(HashPageOpaqueData))));
+    Assert(ph->pd_special ==
+          (BLCKSZ - DOUBLEALIGN(sizeof(HashPageOpaqueData))));
+    Assert(ph->pd_opaque.od_pagesize == BLCKSZ);
+#endif
+    if (flags) {
+       opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+       Assert(opaque->hasho_flag & flags);
+    }
+}
diff --git a/src/backend/access/heap/Makefile.inc b/src/backend/access/heap/Makefile.inc

new file mode 100644 (file)

index 0000000..077b0bf
--- /dev/null
+++ b/src/backend/access/heap/Makefile.inc
@@ -0,0 +1,14 @@
+#-------------------------------------------------------------------------
+#
+# Makefile.inc--
+#    Makefile for access/heap
+#
+# Copyright (c) 1994, Regents of the University of California
+#
+#
+# IDENTIFICATION
+#    $Header$
+#
+#-------------------------------------------------------------------------
+
+SUBSRCS+= heapam.c hio.c stats.c
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c

new file mode 100644 (file)

index 0000000..51839e9
--- /dev/null
+++ b/src/backend/access/heap/heapam.c
@@ -0,0 +1,1507 @@
+/*-------------------------------------------------------------------------
+ *
+ * heapam.c--
+ *    heap access method code
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header$
+ *
+ *
+ * INTERFACE ROUTINES
+ *     heapgettup      - fetch next heap tuple from a scan
+ *     heap_open       - open a heap relation by relationId
+ *     heap_openr      - open a heap relation by name
+ *     heap_close      - close a heap relation
+ *     heap_beginscan  - begin relation scan
+ *     heap_rescan     - restart a relation scan
+ *     heap_endscan    - end relation scan
+ *     heap_getnext    - retrieve next tuple in scan
+ *     heap_fetch      - retrive tuple with tid
+ *     heap_insert     - insert tuple into a relation
+ *     heap_delete     - delete a tuple from a relation
+ *     heap_replace    - replace a tuple in a relation with another tuple
+ *     heap_markpos    - mark scan position
+ *     heap_restrpos   - restore position to marked location
+ *     
+ * NOTES
+ *    This file contains the heap_ routines which implement
+ *    the POSTGRES heap access method used for all POSTGRES
+ *    relations.  
+ *
+ * OLD COMMENTS
+ *     struct relscan hints:  (struct should be made AM independent?)
+ *
+ *     rs_ctid is the tid of the last tuple returned by getnext.
+ *     rs_ptid and rs_ntid are the tids of the previous and next tuples
+ *     returned by getnext, respectively.  NULL indicates an end of
+ *     scan (either direction); NON indicates an unknow value.
+ *
+ *     possible combinations:
+ *     rs_p    rs_c    rs_n            interpretation
+ *     NULL    NULL    NULL            empty scan
+ *     NULL    NULL    NON             at begining of scan
+ *     NULL    NULL    t1              at begining of scan (with cached tid)
+ *     NON     NULL    NULL            at end of scan
+ *     t1      NULL    NULL            at end of scan (with cached tid)
+ *     NULL    t1      NULL            just returned only tuple
+ *     NULL    t1      NON             just returned first tuple
+ *     NULL    t1      t2              returned first tuple (with cached tid)
+ *     NON     t1      NULL            just returned last tuple
+ *     t2      t1      NULL            returned last tuple (with cached tid)
+ *     t1      t2      NON             in the middle of a forward scan
+ *     NON     t2      t1              in the middle of a reverse scan
+ *     ti      tj      tk              in the middle of a scan (w cached tid)
+ *
+ *     Here NULL is ...tup == NULL && ...buf == InvalidBuffer,
+ *     and NON is ...tup == NULL && ...buf == UnknownBuffer.
+ *
+ *     Currently, the NONTID values are not cached with their actual
+ *     values by getnext.  Values may be cached by markpos since it stores
+ *     all three tids.
+ *
+ *     NOTE:  the calls to elog() must stop.  Should decide on an interface
+ *     between the general and specific AM calls.
+ *
+ *     XXX probably do not need a free tuple routine for heaps.
+ *     Huh?  Free tuple is not necessary for tuples returned by scans, but
+ *     is necessary for tuples which are returned by
+ *     RelationGetTupleByItemPointer. -hirohama
+ *
+ *-------------------------------------------------------------------------
+ */
+#include <sys/file.h>
+#include <string.h>
+
+#include "postgres.h"
+
+#include "access/attnum.h"
+#include "access/heapam.h"
+#include "access/hio.h"
+#include "access/htup.h"
+#include "access/relscan.h"
+#include "access/skey.h"
+
+#include "utils/tqual.h"
+#include "access/valid.h"
+#include "access/xact.h"
+
+#include "catalog/catalog.h"
+#include "catalog/catname.h"
+#include "storage/buf.h"
+#include "storage/bufmgr.h"
+#include "storage/bufpage.h"
+#include "storage/itemid.h"
+#include "storage/itemptr.h"
+#include "storage/lmgr.h"
+
+#include "tcop/tcopdebug.h"
+#include "miscadmin.h"
+
+#include "utils/memutils.h"
+#include "utils/palloc.h"
+#include "fmgr.h"
+#include "utils/inval.h"
+#include "utils/elog.h"
+#include "utils/mcxt.h"
+#include "utils/rel.h"
+#include "utils/relcache.h"
+
+static bool    ImmediateInvalidation;
+
+/* ----------------------------------------------------------------
+ *                       heap support routines
+ * ----------------------------------------------------------------
+ */
+
+/* ----------------
+ *     initsdesc - sdesc code common to heap_beginscan and heap_rescan
+ * ----------------
+ */
+static void
+initsdesc(HeapScanDesc sdesc,
+         Relation relation,
+         int atend,
+         unsigned nkeys,
+         ScanKey key)
+{
+    if (!RelationGetNumberOfBlocks(relation)) {
+       /* ----------------
+        *  relation is empty
+        * ----------------
+        */
+       sdesc->rs_ntup = sdesc->rs_ctup = sdesc->rs_ptup = NULL;
+       sdesc->rs_nbuf = sdesc->rs_cbuf = sdesc->rs_pbuf = InvalidBuffer;
+    } else if (atend) {
+       /* ----------------
+        *  reverse scan
+        * ----------------
+        */
+       sdesc->rs_ntup = sdesc->rs_ctup = NULL;
+       sdesc->rs_nbuf = sdesc->rs_cbuf = InvalidBuffer;
+       sdesc->rs_ptup = NULL;
+       sdesc->rs_pbuf = UnknownBuffer;
+    } else {
+       /* ----------------
+        *  forward scan
+        * ----------------
+        */
+       sdesc->rs_ctup = sdesc->rs_ptup = NULL;
+       sdesc->rs_cbuf = sdesc->rs_pbuf = InvalidBuffer;
+       sdesc->rs_ntup = NULL;
+       sdesc->rs_nbuf = UnknownBuffer;
+    } /* invalid too */
+    
+    /* we don't have a marked position... */
+    ItemPointerSetInvalid(&(sdesc->rs_mptid));
+    ItemPointerSetInvalid(&(sdesc->rs_mctid));
+    ItemPointerSetInvalid(&(sdesc->rs_mntid));
+    ItemPointerSetInvalid(&(sdesc->rs_mcd));
+    
+    /* ----------------
+     * copy the scan key, if appropriate
+     * ----------------
+     */
+    if (key != NULL)
+       memmove(sdesc->rs_key, key, nkeys * sizeof(ScanKeyData));
+}
+
+/* ----------------
+ *     unpinsdesc - code common to heap_rescan and heap_endscan
+ * ----------------
+ */
+static void
+unpinsdesc(HeapScanDesc sdesc)
+{
+    if (BufferIsValid(sdesc->rs_pbuf)) {
+       ReleaseBuffer(sdesc->rs_pbuf);
+    }
+    
+    /* ------------------------------------
+     *  Scan will pin buffer one for each non-NULL tuple pointer
+     *  (ptup, ctup, ntup), so they have to be unpinned multiple
+     *  times.
+     * ------------------------------------
+     */
+    if (BufferIsValid(sdesc->rs_cbuf)) {
+       ReleaseBuffer(sdesc->rs_cbuf);
+    }
+    
+    if (BufferIsValid(sdesc->rs_nbuf)) {
+       ReleaseBuffer(sdesc->rs_nbuf);
+    }
+}
+
+/* ------------------------------------------
+ *     nextpage
+ *
+ *     figure out the next page to scan after the current page
+ *     taking into account of possible adjustment of degrees of
+ *     parallelism
+ * ------------------------------------------
+ */
+static int
+nextpage(int page, int dir)
+{
+    return((dir<0)?page-1:page+1);
+}
+
+/* ----------------
+ *     heapgettup - fetch next heap tuple
+ *
+ *     routine used by heap_getnext() which does most of the
+ *     real work in scanning tuples.
+ * ----------------
+ */
+static HeapTuple 
+heapgettup(Relation relation,
+          ItemPointer tid,
+          int dir,
+          Buffer *b,
+          TimeQual timeQual,
+          int nkeys,
+          ScanKey key)
+{
+    ItemId             lpp;
+    Page               dp;
+    int                        page;
+    int                        pages;
+    int                        lines;
+    HeapTuple          rtup;
+    OffsetNumber       lineoff;
+    int                        linesleft;
+    
+    /* ----------------
+     * increment access statistics
+     * ----------------
+     */
+    IncrHeapAccessStat(local_heapgettup);
+    IncrHeapAccessStat(global_heapgettup);
+    
+    /* ----------------
+     * debugging stuff 
+     *
+     * check validity of arguments, here and for other functions too
+     * Note: no locking manipulations needed--this is a local function
+     * ----------------
+     */
+#ifdef HEAPDEBUGALL
+    if (ItemPointerIsValid(tid)) {
+       elog(DEBUG, "heapgettup(%.16s, tid=0x%x[%d,%d], dir=%d, ...)",
+            RelationGetRelationName(relation), tid, tid->ip_blkid,
+            tid->ip_posid, dir);
+    } else {
+       elog(DEBUG, "heapgettup(%.16s, tid=0x%x, dir=%d, ...)",
+            RelationGetRelationName(relation), tid, dir);
+    }
+    elog(DEBUG, "heapgettup(..., b=0x%x, timeQ=0x%x, nkeys=%d, key=0x%x",
+        b, timeQual, nkeys, key);
+    if (timeQual == SelfTimeQual) {
+       elog(DEBUG, "heapgettup: relation(%c)=`%.16s', SelfTimeQual",
+            relation->rd_rel->relkind, &relation->rd_rel->relname);
+    } else {
+       elog(DEBUG, "heapgettup: relation(%c)=`%.16s', timeQual=%d",
+            relation->rd_rel->relkind, &relation->rd_rel->relname,
+            timeQual);
+    }
+#endif /* !defined(HEAPDEBUGALL) */
+    
+    if (!ItemPointerIsValid(tid)) {
+       Assert(!PointerIsValid(tid));
+    }
+    
+    /* ----------------
+     * return null immediately if relation is empty
+     * ----------------
+     */
+    if (!(pages = relation->rd_nblocks))
+       return (NULL);
+    
+    /* ----------------
+     * calculate next starting lineoff, given scan direction
+     * ----------------
+     */
+    if (!dir) {
+       /* ----------------
+        * ``no movement'' scan direction
+        * ----------------
+        */
+       /* assume it is a valid TID XXX */
+       if (ItemPointerIsValid(tid) == false) {
+           *b = InvalidBuffer;
+           return (NULL);
+       }
+       *b = RelationGetBufferWithBuffer(relation,
+                                        ItemPointerGetBlockNumber(tid),
+                                        *b);
+       
+#ifndef NO_BUFFERISVALID
+       if (!BufferIsValid(*b)) {
+           elog(WARN, "heapgettup: failed ReadBuffer");
+       }
+#endif
+       
+       dp = (Page) BufferGetPage(*b);
+       lineoff = ItemPointerGetOffsetNumber(tid);
+       lpp = PageGetItemId(dp, lineoff);
+       
+       rtup = (HeapTuple)PageGetItem((Page) dp, lpp);
+       return (rtup);
+       
+    } else if (dir < 0) {
+       /* ----------------
+        *  reverse scan direction
+        * ----------------
+        */
+       if (ItemPointerIsValid(tid) == false) {
+           tid = NULL;
+       }
+       if (tid == NULL) {
+           page = pages - 1;                           /* final page */
+       } else {
+           page = ItemPointerGetBlockNumber(tid);      /* current page */
+       }
+       if (page < 0) { 
+           *b = InvalidBuffer;
+           return (NULL);
+       }
+       
+       *b = RelationGetBufferWithBuffer(relation, page, *b);
+#ifndef NO_BUFFERISVALID
+       if (!BufferIsValid(*b)) {
+           elog(WARN, "heapgettup: failed ReadBuffer");
+       }
+#endif
+       
+       dp = (Page) BufferGetPage(*b);
+       lines = PageGetMaxOffsetNumber(dp);
+       if (tid == NULL) {
+           lineoff = lines;                            /* final offnum */
+       } else {
+           lineoff =                                   /* previous offnum */
+               OffsetNumberPrev(ItemPointerGetOffsetNumber(tid));
+       }
+       /* page and lineoff now reference the physically previous tid */
+
+    } else {
+       /* ----------------
+        *  forward scan direction
+        * ----------------
+        */
+       if (ItemPointerIsValid(tid) == false) {
+           page = 0;                                   /* first page */
+           lineoff = FirstOffsetNumber;                /* first offnum */
+       } else {
+           page = ItemPointerGetBlockNumber(tid);      /* current page */
+           lineoff =                                   /* next offnum */
+               OffsetNumberNext(ItemPointerGetOffsetNumber(tid));
+       }
+       
+       if (page >= pages) {
+           *b = InvalidBuffer;
+           return (NULL);
+       }
+       /* page and lineoff now reference the physically next tid */
+
+       *b = RelationGetBufferWithBuffer(relation, page, *b);
+#ifndef NO_BUFFERISVALID
+       if (!BufferIsValid(*b)) {
+           elog(WARN, "heapgettup: failed ReadBuffer");
+       }
+#endif
+       
+       dp = (Page) BufferGetPage(*b);
+       lines = PageGetMaxOffsetNumber(dp);
+    }
+    
+    /* 'dir' is now non-zero */
+
+    /* ----------------
+     * calculate line pointer and number of remaining items
+     *  to check on this page.
+     * ----------------
+     */
+    lpp = PageGetItemId(dp, lineoff);
+    if (dir < 0) {
+       linesleft = lineoff - 1;
+    } else {
+       linesleft = lines - lineoff;
+    }
+
+    /* ----------------
+     * advance the scan until we find a qualifying tuple or
+     *  run out of stuff to scan
+     * ----------------
+     */
+    for (;;) {
+       while (linesleft >= 0) {
+           /* ----------------
+            *  if current tuple qualifies, return it.
+            * ----------------
+            */
+           if ((rtup = heap_tuple_satisfies(lpp, relation, (PageHeader) dp,
+                                            timeQual, nkeys, key)) != NULL) {
+               ItemPointer iptr = &(rtup->t_ctid); 
+               if (ItemPointerGetBlockNumber(iptr) != page) {
+                   /*
+                    * set block id to the correct page number
+                    * --- this is a hack to support the virtual fragment
+                    * concept
+                    */
+                   ItemPointerSetBlockNumber(iptr, page);
+               }
+               return (rtup);
+           }
+           
+           /* ----------------
+            *  otherwise move to the next item on the page
+            * ----------------
+            */
+           --linesleft;
+           if (dir < 0) {
+               --lpp;  /* move back in this page's ItemId array */
+           } else {
+               ++lpp;  /* move forward in this page's ItemId array */
+           }
+       }
+       
+       /* ----------------
+        *  if we get here, it means we've exhausted the items on
+        *  this page and it's time to move to the next..
+        * ----------------
+        */
+       page = nextpage(page, dir);
+       
+       /* ----------------
+        *  return NULL if we've exhausted all the pages..
+        * ----------------
+        */
+       if (page < 0 || page >= pages) {
+           if (BufferIsValid(*b))
+               ReleaseBuffer(*b);
+           *b = InvalidBuffer;
+           return (NULL);
+       }
+       
+       *b = ReleaseAndReadBuffer(*b, relation, page);
+       
+#ifndef NO_BUFFERISVALID
+       if (!BufferIsValid(*b)) {
+           elog(WARN, "heapgettup: failed ReadBuffer");
+       }
+#endif
+       dp = (Page) BufferGetPage(*b);
+       lines = lineoff = PageGetMaxOffsetNumber((Page) dp);
+       linesleft = lines - 1;
+       if (dir < 0) {
+           lpp = PageGetItemId(dp, lineoff);
+       } else {
+           lpp = PageGetItemId(dp, FirstOffsetNumber);
+       }
+    }
+}
+
+void
+doinsert(Relation relation, HeapTuple tup)
+{
+    RelationPutHeapTupleAtEnd(relation, tup);
+    return;
+}
+
+/* 
+ *     HeapScanIsValid is now a macro in relscan.h -cim 4/27/91
+ */
+
+/* ----------------
+ *     SetHeapAccessMethodImmediateInvalidation
+ * ----------------
+ */
+void
+SetHeapAccessMethodImmediateInvalidation(bool on)
+{
+    ImmediateInvalidation = on;
+}
+
+/* ----------------------------------------------------------------
+ *                   heap access method interface
+ * ----------------------------------------------------------------
+ */
+/* ----------------
+ *     heap_open - open a heap relation by relationId
+ *
+ *     presently the relcache routines do all the work we need
+ *     to open/close heap relations.
+ * ----------------
+ */
+Relation
+heap_open(Oid relationId)
+{
+    Relation r;
+    
+    /* ----------------
+     * increment access statistics
+     * ----------------
+     */
+    IncrHeapAccessStat(local_open);
+    IncrHeapAccessStat(global_open);
+    
+    r = (Relation) RelationIdGetRelation(relationId);
+    
+    if (RelationIsValid(r) && r->rd_rel->relkind == RELKIND_INDEX) {
+       elog(WARN, "%s is an index relation", r->rd_rel->relname.data);
+    }
+    
+    return (r);
+}
+
+/* ----------------
+ *     heap_openr - open a heap relation by name
+ *
+ *     presently the relcache routines do all the work we need
+ *     to open/close heap relations.
+ * ----------------
+ */
+Relation
+heap_openr(char *relationName)
+{
+    Relation r;
+    
+    /* ----------------
+     * increment access statistics
+     * ----------------
+     */
+    IncrHeapAccessStat(local_openr);
+    IncrHeapAccessStat(global_openr);
+    
+    r = RelationNameGetRelation(relationName);
+    
+    if (RelationIsValid(r) && r->rd_rel->relkind == RELKIND_INDEX) {
+       elog(WARN, "%s is an index relation", r->rd_rel->relname.data);
+    }
+    
+    return (r);
+}
+
+/* ----------------
+ *     heap_close - close a heap relation
+ *
+ *     presently the relcache routines do all the work we need
+ *     to open/close heap relations.
+ * ----------------
+ */
+void
+heap_close(Relation relation)
+{
+    /* ----------------
+     * increment access statistics
+     * ----------------
+     */
+    IncrHeapAccessStat(local_close);
+    IncrHeapAccessStat(global_close);
+    
+    (void) RelationClose(relation);
+}
+
+
+/* ----------------
+ *     heap_beginscan  - begin relation scan
+ * ----------------
+ */
+HeapScanDesc
+heap_beginscan(Relation relation,
+              int atend,
+              TimeQual timeQual,
+              unsigned nkeys,
+              ScanKey key)
+{
+    HeapScanDesc       sdesc;
+    
+    /* ----------------
+     * increment access statistics
+     * ----------------
+     */
+    IncrHeapAccessStat(local_beginscan);
+    IncrHeapAccessStat(global_beginscan);
+    
+    /* ----------------
+     * sanity checks
+     * ----------------
+     */
+    if (RelationIsValid(relation) == false)
+       elog(WARN, "heap_beginscan: !RelationIsValid(relation)");
+    
+    /* ----------------
+     * set relation level read lock
+     * ----------------
+     */
+    RelationSetLockForRead(relation);
+    
+    /* XXX someday assert SelfTimeQual if relkind == RELKIND_UNCATALOGED */
+    if (relation->rd_rel->relkind == RELKIND_UNCATALOGED) {
+       timeQual = SelfTimeQual;
+    }
+    
+    /* ----------------
+     *  increment relation ref count while scanning relation
+     * ----------------
+     */
+    RelationIncrementReferenceCount(relation);
+    
+    /* ----------------
+     * allocate and initialize scan descriptor
+     * ----------------
+     */
+    sdesc = (HeapScanDesc) palloc(sizeof(HeapScanDescData));
+    
+    relation->rd_nblocks = smgrnblocks(relation->rd_rel->relsmgr, relation);
+    sdesc->rs_rd = relation;
+    
+    if (nkeys) {
+       /*
+        * we do this here instead of in initsdesc() because heap_rescan also
+        * calls initsdesc() and we don't want to allocate memory again
+        */
+       sdesc->rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
+    } else {
+       sdesc->rs_key = NULL;
+    }
+
+    initsdesc(sdesc, relation, atend, nkeys, key);
+    
+    sdesc->rs_atend = atend;
+    sdesc->rs_tr = timeQual;
+    sdesc->rs_nkeys = (short)nkeys;
+    
+    return (sdesc);
+}
+
+/* ----------------
+ *     heap_rescan     - restart a relation scan
+ * ----------------
+ */
+void
+heap_rescan(HeapScanDesc sdesc,
+           bool scanFromEnd,
+           ScanKey key)
+{
+    /* ----------------
+     * increment access statistics
+     * ----------------
+     */
+    IncrHeapAccessStat(local_rescan);
+    IncrHeapAccessStat(global_rescan);
+    
+    /* Note: set relation level read lock is still set */
+    
+    /* ----------------
+     * unpin scan buffers
+     * ----------------
+     */
+    unpinsdesc(sdesc);
+    
+    /* ----------------
+     * reinitialize scan descriptor
+     * ----------------
+     */
+    initsdesc(sdesc, sdesc->rs_rd, scanFromEnd, sdesc->rs_nkeys, key);
+    sdesc->rs_atend = (bool) scanFromEnd;
+}
+
+/* ----------------
+ *     heap_endscan    - end relation scan
+ *
+ *     See how to integrate with index scans.
+ *     Check handling if reldesc caching.
+ * ----------------
+ */
+void
+heap_endscan(HeapScanDesc sdesc)
+{
+    /* ----------------
+     * increment access statistics
+     * ----------------
+     */
+    IncrHeapAccessStat(local_endscan);    
+    IncrHeapAccessStat(global_endscan);    
+    
+    /* Note: no locking manipulations needed */
+    
+    /* ----------------
+     * unpin scan buffers
+     * ----------------
+     */
+    unpinsdesc(sdesc);
+    
+    /* ----------------
+     * decrement relation reference count and free scan descriptor storage
+     * ----------------
+     */
+    RelationDecrementReferenceCount(sdesc->rs_rd);
+    
+    /* ----------------
+     * Non 2-phase read locks on catalog relations
+     * ----------------
+     */
+    if ( IsSystemRelationName(RelationGetRelationName(sdesc->rs_rd)->data) )
+
+       RelationUnsetLockForRead(sdesc->rs_rd);
+    
+    pfree(sdesc);      /* XXX */
+}
+
+/* ----------------
+ *     heap_getnext    - retrieve next tuple in scan
+ *
+ *     Fix to work with index relations.
+ * ----------------
+ */
+
+#ifdef HEAPDEBUGALL
+#define HEAPDEBUG_1 \
+elog(DEBUG, "heap_getnext([%s,nkeys=%d],backw=%d,0x%x) called", \
+     sdesc->rs_rd->rd_rel->relname.data, sdesc->rs_nkeys, backw, b)
+     
+#define HEAPDEBUG_2 \
+     elog(DEBUG, "heap_getnext called with backw (no tracing yet)")
+     
+#define HEAPDEBUG_3 \
+     elog(DEBUG, "heap_getnext returns NULL at end")
+     
+#define HEAPDEBUG_4 \
+     elog(DEBUG, "heap_getnext valid buffer UNPIN'd")
+     
+#define HEAPDEBUG_5 \
+     elog(DEBUG, "heap_getnext next tuple was cached")
+     
+#define HEAPDEBUG_6 \
+     elog(DEBUG, "heap_getnext returning EOS")
+     
+#define HEAPDEBUG_7 \
+     elog(DEBUG, "heap_getnext returning tuple");
+#else
+#define HEAPDEBUG_1
+#define HEAPDEBUG_2
+#define HEAPDEBUG_3
+#define HEAPDEBUG_4
+#define HEAPDEBUG_5
+#define HEAPDEBUG_6
+#define HEAPDEBUG_7
+#endif /* !defined(HEAPDEBUGALL) */
+     
+     
+HeapTuple
+heap_getnext(HeapScanDesc scandesc,
+            int backw,
+            Buffer *b)
+{
+    register HeapScanDesc sdesc = scandesc;
+    Buffer               localb;
+    
+    /* ----------------
+     * increment access statistics
+     * ----------------
+     */
+    IncrHeapAccessStat(local_getnext);    
+    IncrHeapAccessStat(global_getnext);    
+    
+    /* Note: no locking manipulations needed */
+    
+    /* ----------------
+     * argument checks
+     * ----------------
+     */
+    if (sdesc == NULL)
+       elog(WARN, "heap_getnext: NULL relscan");
+    
+    /* ----------------
+     * initialize return buffer to InvalidBuffer
+     * ----------------
+     */
+    if (! PointerIsValid(b)) b = &localb;
+    (*b) = InvalidBuffer;
+    
+    HEAPDEBUG_1; /* heap_getnext( info ) */
+    
+    if (backw) {
+       /* ----------------
+        *  handle reverse scan
+        * ----------------
+        */
+       HEAPDEBUG_2; /* heap_getnext called with backw */
+       
+       if (sdesc->rs_ptup == sdesc->rs_ctup &&
+           BufferIsInvalid(sdesc->rs_pbuf))
+           {
+               if (BufferIsValid(sdesc->rs_nbuf))
+                   ReleaseBuffer(sdesc->rs_nbuf);
+               return (NULL);
+           }
+       
+       /*
+        * Copy the "current" tuple/buffer
+        * to "next". Pin/unpin the buffers
+        * accordingly
+        */
+       if (sdesc->rs_nbuf != sdesc->rs_cbuf) {
+           if (BufferIsValid(sdesc->rs_nbuf))
+               ReleaseBuffer(sdesc->rs_nbuf);
+           if (BufferIsValid(sdesc->rs_cbuf))
+               IncrBufferRefCount(sdesc->rs_cbuf);
+       }
+       sdesc->rs_ntup = sdesc->rs_ctup;
+       sdesc->rs_nbuf = sdesc->rs_cbuf;
+       
+       if (sdesc->rs_ptup != NULL) {
+           if (sdesc->rs_cbuf != sdesc->rs_pbuf) {
+               if (BufferIsValid(sdesc->rs_cbuf))
+                   ReleaseBuffer(sdesc->rs_cbuf);
+               if (BufferIsValid(sdesc->rs_pbuf))
+                   IncrBufferRefCount(sdesc->rs_pbuf);
+           }
+           sdesc->rs_ctup = sdesc->rs_ptup;
+           sdesc->rs_cbuf = sdesc->rs_pbuf;
+       } else { /* NONTUP */
+           ItemPointer iptr;
+           
+           iptr = (sdesc->rs_ctup != NULL) ?
+               &(sdesc->rs_ctup->t_ctid) : (ItemPointer) NULL;
+           
+            /* Don't release sdesc->rs_cbuf at this point, because
+               heapgettup doesn't increase PrivateRefCount if it
+               is already set. On a backward scan, both rs_ctup and rs_ntup
+               usually point to the same buffer page, so
+               PrivateRefCount[rs_cbuf] should be 2 (or more, if for instance
+               ctup is stored in a TupleTableSlot).  - 01/09/94 */
+           
+           sdesc->rs_ctup = (HeapTuple)
+               heapgettup(sdesc->rs_rd,
+                          iptr,
+                          -1,
+                          &(sdesc->rs_cbuf),
+                          sdesc->rs_tr,
+                          sdesc->rs_nkeys,
+                          sdesc->rs_key);
+       }
+       
+       if (sdesc->rs_ctup == NULL && !BufferIsValid(sdesc->rs_cbuf))
+           {
+               if (BufferIsValid(sdesc->rs_pbuf))
+                   ReleaseBuffer(sdesc->rs_pbuf);
+               sdesc->rs_ptup = NULL;
+               sdesc->rs_pbuf = InvalidBuffer;
+               if (BufferIsValid(sdesc->rs_nbuf))
+                   ReleaseBuffer(sdesc->rs_nbuf);
+               sdesc->rs_ntup = NULL;
+               sdesc->rs_nbuf = InvalidBuffer;
+               return (NULL);
+           }
+       
+       if (BufferIsValid(sdesc->rs_pbuf))
+           ReleaseBuffer(sdesc->rs_pbuf);
+       sdesc->rs_ptup = NULL;
+       sdesc->rs_pbuf = UnknownBuffer;
+       
+    } else {
+       /* ----------------
+        *  handle forward scan
+        * ----------------
+        */
+       if (sdesc->rs_ctup == sdesc->rs_ntup &&
+           BufferIsInvalid(sdesc->rs_nbuf)) {
+           if (BufferIsValid(sdesc->rs_pbuf))
+               ReleaseBuffer(sdesc->rs_pbuf);
+           HEAPDEBUG_3; /* heap_getnext returns NULL at end */
+           return (NULL);
+       }
+       
+       /*
+        * Copy the "current" tuple/buffer
+        * to "previous". Pin/unpin the buffers
+        * accordingly
+        */
+       if (sdesc->rs_pbuf != sdesc->rs_cbuf) {
+           if (BufferIsValid(sdesc->rs_pbuf))
+               ReleaseBuffer(sdesc->rs_pbuf);
+           if (BufferIsValid(sdesc->rs_cbuf))
+               IncrBufferRefCount(sdesc->rs_cbuf);
+       }
+       sdesc->rs_ptup = sdesc->rs_ctup;
+       sdesc->rs_pbuf = sdesc->rs_cbuf;
+       
+       if (sdesc->rs_ntup != NULL) {
+           if (sdesc->rs_cbuf != sdesc->rs_nbuf) {
+               if (BufferIsValid(sdesc->rs_cbuf))
+                   ReleaseBuffer(sdesc->rs_cbuf);
+               if (BufferIsValid(sdesc->rs_nbuf))
+                   IncrBufferRefCount(sdesc->rs_nbuf);
+           }
+           sdesc->rs_ctup = sdesc->rs_ntup;
+           sdesc->rs_cbuf = sdesc->rs_nbuf;
+           HEAPDEBUG_5; /* heap_getnext next tuple was cached */
+       } else { /* NONTUP */
+           ItemPointer iptr;
+           
+           iptr = (sdesc->rs_ctup != NULL) ?
+               &sdesc->rs_ctup->t_ctid : (ItemPointer) NULL;
+           
+            /* Don't release sdesc->rs_cbuf at this point, because
+               heapgettup doesn't increase PrivateRefCount if it
+               is already set. On a forward scan, both rs_ctup and rs_ptup
+               usually point to the same buffer page, so
+               PrivateRefCount[rs_cbuf] should be 2 (or more, if for instance
+               ctup is stored in a TupleTableSlot).  - 01/09/93 */
+           
+           sdesc->rs_ctup = (HeapTuple)
+               heapgettup(sdesc->rs_rd,
+                          iptr,
+                          1,
+                          &sdesc->rs_cbuf,
+                          sdesc->rs_tr,
+                          sdesc->rs_nkeys,
+                          sdesc->rs_key);
+       }
+       
+       if (sdesc->rs_ctup == NULL && !BufferIsValid(sdesc->rs_cbuf)) {
+           if (BufferIsValid(sdesc->rs_nbuf))
+               ReleaseBuffer(sdesc->rs_nbuf);
+           sdesc->rs_ntup = NULL;
+           sdesc->rs_nbuf = InvalidBuffer;
+           if (BufferIsValid(sdesc->rs_pbuf))
+               ReleaseBuffer(sdesc->rs_pbuf);
+           sdesc->rs_ptup = NULL;
+           sdesc->rs_pbuf = InvalidBuffer;
+           HEAPDEBUG_6; /* heap_getnext returning EOS */
+           return (NULL);
+       }
+       
+       if (BufferIsValid(sdesc->rs_nbuf))
+           ReleaseBuffer(sdesc->rs_nbuf);
+       sdesc->rs_ntup = NULL;
+       sdesc->rs_nbuf = UnknownBuffer;
+    }
+    
+    /* ----------------
+     * if we get here it means we have a new current scan tuple, so
+     *  point to the proper return buffer and return the tuple.
+     * ----------------
+     */
+    (*b) = sdesc->rs_cbuf;
+    
+    HEAPDEBUG_7; /* heap_getnext returning tuple */
+    
+    return (sdesc->rs_ctup);
+}
+
+/* ----------------
+ *     heap_fetch      - retrive tuple with tid
+ *
+ *     Currently ignores LP_IVALID during processing!
+ * ----------------
+ */
+HeapTuple
+heap_fetch(Relation relation,
+          TimeQual timeQual,
+          ItemPointer tid,
+          Buffer *b)
+{
+    ItemId             lp;
+    Buffer             buffer;
+    PageHeader         dp;
+    HeapTuple          tuple;
+    OffsetNumber       offnum;
+    
+    /* ----------------
+     * increment access statistics
+     * ----------------
+     */
+    IncrHeapAccessStat(local_fetch);    
+    IncrHeapAccessStat(global_fetch);    
+    
+    /*
+     * Note: This is collosally expensive - does two system calls per
+     * indexscan tuple fetch.  Not good, and since we should be doing
+     * page level locking by the scanner anyway, it is commented out.
+     */
+    
+    /* RelationSetLockForTupleRead(relation, tid); */
+    
+    /* ----------------
+     * get the buffer from the relation descriptor
+     *  Note that this does a buffer pin.
+     * ----------------
+     */
+    
+    buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
+    
+#ifndef NO_BUFFERISVALID
+    if (!BufferIsValid(buffer)) {
+       elog(WARN, "heap_fetch: %s relation: ReadBuffer(%lx) failed",
+            &relation->rd_rel->relname, (long)tid);
+    }
+#endif
+    
+    /* ----------------
+     * get the item line pointer corresponding to the requested tid
+     * ----------------
+     */
+    dp = (PageHeader) BufferGetPage(buffer);
+    offnum = ItemPointerGetOffsetNumber(tid);
+    lp = PageGetItemId(dp, offnum);
+    
+    /* ----------------
+     * more sanity checks
+     * ----------------
+     */
+    
+    Assert(ItemIdIsUsed(lp)); 
+    
+    /* ----------------
+     * check time qualification of tid
+     * ----------------
+     */
+    
+    tuple = heap_tuple_satisfies(lp, relation, dp,
+                                timeQual, 0,(ScanKey)NULL);
+    
+    if (tuple == NULL)
+       {
+           ReleaseBuffer(buffer);
+           return (NULL);
+       }
+    
+    /* ----------------
+     * all checks passed, now either return a copy of the tuple
+     *  or pin the buffer page and return a pointer, depending on
+     *  whether caller gave us a valid b.
+     * ----------------
+     */
+    
+    if (PointerIsValid(b)) {
+       *b = buffer;
+    } else {
+       tuple = heap_copytuple(tuple);
+       ReleaseBuffer(buffer);
+    }
+    return (tuple);
+}
+
+/* ----------------
+ *     heap_insert     - insert tuple
+ *
+ *     The assignment of t_min (and thus the others) should be
+ *     removed eventually.
+ *
+ *     Currently places the tuple onto the last page.  If there is no room,
+ *     it is placed on new pages.  (Heap relations)
+ *     Note that concurrent inserts during a scan will probably have
+ *     unexpected results, though this will be fixed eventually.
+ *
+ *     Fix to work with indexes.
+ * ----------------
+ */
+Oid
+heap_insert(Relation relation, HeapTuple tup)
+{
+    /* ----------------
+     * increment access statistics
+     * ----------------
+     */
+    IncrHeapAccessStat(local_insert);
+    IncrHeapAccessStat(global_insert);
+    
+    /* ----------------
+     * set relation level write lock. If this is a "local" relation (not
+     *  visible to others), we don't need to set a write lock.
+     * ----------------
+     */
+    if (!relation->rd_islocal)
+       RelationSetLockForWrite(relation);
+
+    /* ----------------
+     *  If the object id of this tuple has already been assigned, trust
+     *  the caller.  There are a couple of ways this can happen.  At initial
+     *  db creation, the backend program sets oids for tuples.  When we
+     *  define an index, we set the oid.  Finally, in the future, we may
+     *  allow users to set their own object ids in order to support a
+     *  persistent object store (objects need to contain pointers to one
+     *  another).
+     * ----------------
+     */
+    if (!OidIsValid(tup->t_oid)) {
+       tup->t_oid = newoid();
+       LastOidProcessed = tup->t_oid;
+    }
+    
+    TransactionIdStore(GetCurrentTransactionId(), &(tup->t_xmin));
+    tup->t_cmin = GetCurrentCommandId();
+    StoreInvalidTransactionId(&(tup->t_xmax));
+    tup->t_tmin = INVALID_ABSTIME;
+    tup->t_tmax = CURRENT_ABSTIME;
+    
+    doinsert(relation, tup);
+    
+    if ( IsSystemRelationName(RelationGetRelationName(relation)->data)) {
+       RelationUnsetLockForWrite(relation);
+    
+       /* ----------------
+        *      invalidate caches (only works for system relations)
+        * ----------------
+        */
+       SetRefreshWhenInvalidate(ImmediateInvalidation);
+       RelationInvalidateHeapTuple(relation, tup);
+       SetRefreshWhenInvalidate((bool)!ImmediateInvalidation);
+    }
+    
+    return(tup->t_oid);
+}
+
+/* ----------------
+ *     heap_delete     - delete a tuple
+ *
+ *     Must decide how to handle errors.
+ * ----------------
+ */
+void
+heap_delete(Relation relation, ItemPointer tid)
+{
+    ItemId             lp;
+    HeapTuple          tp;
+    PageHeader         dp;
+    Buffer             b;
+    
+    /* ----------------
+     * increment access statistics
+     * ----------------
+     */
+    IncrHeapAccessStat(local_delete);
+    IncrHeapAccessStat(global_delete);
+    
+    /* ----------------
+     * sanity check
+     * ----------------
+     */
+    Assert(ItemPointerIsValid(tid));
+    
+    /* ----------------
+     * set relation level write lock
+     * ----------------
+     */
+    RelationSetLockForWrite(relation);
+    
+    b = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
+    
+#ifndef NO_BUFFERISVALID
+    if (!BufferIsValid(b)) { /* XXX L_SH better ??? */
+       elog(WARN, "heap_delete: failed ReadBuffer");
+    }
+#endif /* NO_BUFFERISVALID */
+    
+    dp = (PageHeader) BufferGetPage(b);
+    lp = PageGetItemId(dp, ItemPointerGetOffsetNumber(tid));
+    
+    /* ----------------
+     * check that we're deleteing a valid item
+     * ----------------
+     */
+    if (!(tp = heap_tuple_satisfies(lp, relation, dp,
+                                   NowTimeQual, 0, (ScanKey) NULL))) {
+       
+       /* XXX call something else */
+       ReleaseBuffer(b);
+       
+       elog(WARN, "heap_delete: (am)invalid tid");
+    }
+    
+    /* ----------------
+     * get the tuple and lock tell the buffer manager we want
+     *  exclusive access to the page
+     * ----------------
+     */
+    
+    /* ----------------
+     * store transaction information of xact deleting the tuple
+     * ----------------
+     */
+    TransactionIdStore(GetCurrentTransactionId(), &(tp->t_xmax));
+    tp->t_cmax = GetCurrentCommandId();
+    ItemPointerSetInvalid(&tp->t_chain);
+    
+    /* ----------------
+     * invalidate caches
+     * ----------------
+     */
+    SetRefreshWhenInvalidate(ImmediateInvalidation);
+    RelationInvalidateHeapTuple(relation, tp);
+    SetRefreshWhenInvalidate((bool)!ImmediateInvalidation);
+    
+    WriteBuffer(b);
+    if ( IsSystemRelationName(RelationGetRelationName(relation)->data) )
+       RelationUnsetLockForWrite(relation);
+}
+
+/* ----------------
+ *     heap_replace    - replace a tuple
+ *
+ *     Must decide how to handle errors.
+ *
+ *     Fix arguments, work with indexes.
+ * 
+ *      12/30/93 - modified the return value to be 1 when
+ *                a non-functional update is detected. This
+ *                prevents the calling routine from updating
+ *                indices unnecessarily. -kw
+ *
+ * ----------------
+ */
+int
+heap_replace(Relation relation, ItemPointer otid, HeapTuple tup)
+{
+    ItemId             lp;
+    HeapTuple          tp;
+    Page               dp;
+    Buffer             buffer;
+    
+    /* ----------------
+     * increment access statistics
+     * ----------------
+     */
+    IncrHeapAccessStat(local_replace);
+    IncrHeapAccessStat(global_replace);
+    
+    /* ----------------
+     * sanity checks
+     * ----------------
+     */
+    Assert(ItemPointerIsValid(otid));
+    
+    /* ----------------
+     * set relation level write lock
+     * ----------------
+     */
+    if (!relation->rd_islocal)
+       RelationSetLockForWrite(relation);
+    
+    buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(otid));
+#ifndef NO_BUFFERISVALID
+    if (!BufferIsValid(buffer)) {
+       /* XXX L_SH better ??? */
+       elog(WARN, "amreplace: failed ReadBuffer");
+    }  
+#endif /* NO_BUFFERISVALID */
+    
+    dp = (Page) BufferGetPage(buffer);
+    lp = PageGetItemId(dp, ItemPointerGetOffsetNumber(otid));
+    
+    /* ----------------
+     * logically delete old item
+     * ----------------
+     */
+    
+    tp = (HeapTuple) PageGetItem(dp, lp);
+    Assert(HeapTupleIsValid(tp));
+    
+    /* -----------------
+     *  the following test should be able to catch all non-functional
+     *  update attempts and shut out all ghost tuples.
+     *  XXX In the future, Spyros may need to update the rule lock on a tuple
+     *  more than once within the same command and same transaction.
+     *  He will have to introduce a new flag to override the following check.
+     *  -- Wei
+     *
+     * -----------------
+     */
+    
+    if (TupleUpdatedByCurXactAndCmd(tp)) {
+       elog(NOTICE, "Non-functional update, only first update is performed");
+       if ( IsSystemRelationName(RelationGetRelationName(relation)->data) )
+           RelationUnsetLockForWrite(relation);
+       ReleaseBuffer(buffer);
+       return(1);
+    }
+    
+    /* ----------------
+     * check that we're replacing a valid item -
+     *
+     *  NOTE that this check must follow the non-functional update test
+     *       above as it can happen that we try to 'replace' the same tuple
+     *       twice in a single transaction.  The second time around the
+     *       tuple will fail the NowTimeQual.  We don't want to abort the
+     *       xact, we only want to flag the 'non-functional' NOTICE. -mer
+     * ----------------
+     */
+    if (!heap_tuple_satisfies(lp,
+                             relation,
+                             (PageHeader)dp,
+                             NowTimeQual,
+                             0,
+                             (ScanKey)NULL))
+       {
+           ReleaseBuffer(buffer);
+           elog(WARN, "heap_replace: (am)invalid otid");
+       }
+    
+    /* XXX order problems if not atomic assignment ??? */
+    tup->t_oid = tp->t_oid;
+    TransactionIdStore(GetCurrentTransactionId(), &(tup->t_xmin));
+    tup->t_cmin = GetCurrentCommandId();
+    StoreInvalidTransactionId(&(tup->t_xmax));
+    tup->t_tmin = INVALID_ABSTIME;
+    tup->t_tmax = CURRENT_ABSTIME;
+    ItemPointerSetInvalid(&tup->t_chain);
+    
+    /* ----------------
+     * insert new item
+     * ----------------
+     */
+    if ((unsigned)DOUBLEALIGN(tup->t_len) <= PageGetFreeSpace((Page) dp)) {
+       RelationPutHeapTuple(relation, BufferGetBlockNumber(buffer), tup);
+    } else {
+       /* ----------------
+        *  new item won't fit on same page as old item, have to look
+        *  for a new place to put it.
+        * ----------------
+        */
+       doinsert(relation, tup);
+    }
+
+    /* ----------------
+     * new item in place, now record transaction information
+     * ----------------
+     */
+    TransactionIdStore(GetCurrentTransactionId(), &(tp->t_xmax));
+    tp->t_cmax = GetCurrentCommandId();
+    tp->t_chain = tup->t_ctid;
+    
+    /* ----------------
+     * invalidate caches
+     * ----------------
+     */
+    SetRefreshWhenInvalidate(ImmediateInvalidation);
+    RelationInvalidateHeapTuple(relation, tp);
+    SetRefreshWhenInvalidate((bool)!ImmediateInvalidation);
+    
+    WriteBuffer(buffer);
+    
+    if ( IsSystemRelationName(RelationGetRelationName(relation)->data) )
+       RelationUnsetLockForWrite(relation);
+    
+    return(0);
+}
+
+/* ----------------
+ *     heap_markpos    - mark scan position
+ *
+ *     Note:
+ *             Should only one mark be maintained per scan at one time.
+ *     Check if this can be done generally--say calls to get the
+ *     next/previous tuple and NEVER pass struct scandesc to the
+ *     user AM's.  Now, the mark is sent to the executor for safekeeping.
+ *     Probably can store this info into a GENERAL scan structure.
+ *
+ *     May be best to change this call to store the marked position
+ *     (up to 2?) in the scan structure itself.
+ *     Fix to use the proper caching structure.
+ * ----------------
+ */
+void
+heap_markpos(HeapScanDesc sdesc)
+{
+    
+    /* ----------------
+     * increment access statistics
+     * ----------------
+     */
+    IncrHeapAccessStat(local_markpos);
+    IncrHeapAccessStat(global_markpos);
+    
+    /* Note: no locking manipulations needed */
+    
+    if (sdesc->rs_ptup == NULL &&
+       BufferIsUnknown(sdesc->rs_pbuf)) { /* == NONTUP */
+       sdesc->rs_ptup = (HeapTuple)
+           heapgettup(sdesc->rs_rd,
+                      (sdesc->rs_ctup == NULL) ?
+                      (ItemPointer)NULL : &sdesc->rs_ctup->t_ctid,
+                      -1,
+                      &sdesc->rs_pbuf,
+                      sdesc->rs_tr,
+                      sdesc->rs_nkeys,
+                      sdesc->rs_key);
+       
+    } else if (sdesc->rs_ntup == NULL &&
+              BufferIsUnknown(sdesc->rs_nbuf)) { /* == NONTUP */
+       sdesc->rs_ntup = (HeapTuple)
+           heapgettup(sdesc->rs_rd,
+                      (sdesc->rs_ctup == NULL) ?
+                      (ItemPointer)NULL : &sdesc->rs_ctup->t_ctid,
+                      1,
+                      &sdesc->rs_nbuf,
+                      sdesc->rs_tr,
+                      sdesc->rs_nkeys,
+                      sdesc->rs_key);
+    }
+    
+    /* ----------------
+     * Should not unpin the buffer pages.  They may still be in use.
+     * ----------------
+     */
+    if (sdesc->rs_ptup != NULL) {
+       sdesc->rs_mptid = sdesc->rs_ptup->t_ctid;
+    } else {
+       ItemPointerSetInvalid(&sdesc->rs_mptid);
+    }
+    if (sdesc->rs_ctup != NULL) {
+       sdesc->rs_mctid = sdesc->rs_ctup->t_ctid;
+    } else {
+       ItemPointerSetInvalid(&sdesc->rs_mctid);
+    }
+    if (sdesc->rs_ntup != NULL) {
+       sdesc->rs_mntid = sdesc->rs_ntup->t_ctid;
+    } else {
+       ItemPointerSetInvalid(&sdesc->rs_mntid);
+    }
+}
+
+/* ----------------
+ *     heap_restrpos   - restore position to marked location
+ *
+ *     Note:  there are bad side effects here.  If we were past the end
+ *     of a relation when heapmarkpos is called, then if the relation is
+ *     extended via insert, then the next call to heaprestrpos will set
+ *     cause the added tuples to be visible when the scan continues.
+ *     Problems also arise if the TID's are rearranged!!!
+ *
+ *     Now pins buffer once for each valid tuple pointer (rs_ptup,
+ *     rs_ctup, rs_ntup) referencing it.
+ *      - 01/13/94
+ *
+ * XXX might be better to do direct access instead of
+ *     using the generality of heapgettup().
+ *
+ * XXX It is very possible that when a scan is restored, that a tuple
+ * XXX which previously qualified may fail for time range purposes, unless
+ * XXX some form of locking exists (ie., portals currently can act funny.
+ * ----------------
+ */
+void
+heap_restrpos(HeapScanDesc sdesc)
+{
+    /* ----------------
+     * increment access statistics
+     * ----------------
+     */
+    IncrHeapAccessStat(local_restrpos);
+    IncrHeapAccessStat(global_restrpos);
+    
+    /* XXX no amrestrpos checking that ammarkpos called */
+    
+    /* Note: no locking manipulations needed */
+    
+    unpinsdesc(sdesc);
+    
+    /* force heapgettup to pin buffer for each loaded tuple */
+    sdesc->rs_pbuf = InvalidBuffer;
+    sdesc->rs_cbuf = InvalidBuffer;
+    sdesc->rs_nbuf = InvalidBuffer;
+    
+    if (!ItemPointerIsValid(&sdesc->rs_mptid)) {
+       sdesc->rs_ptup = NULL;
+    } else {
+       sdesc->rs_ptup = (HeapTuple)
+           heapgettup(sdesc->rs_rd,
+                      &sdesc->rs_mptid,
+                      0,
+                      &sdesc->rs_pbuf,
+                      NowTimeQual,
+                      0,
+                      (ScanKey) NULL);
+    }
+    
+    if (!ItemPointerIsValid(&sdesc->rs_mctid)) {
+       sdesc->rs_ctup = NULL;
+    } else {
+       sdesc->rs_ctup = (HeapTuple)
+           heapgettup(sdesc->rs_rd,
+                      &sdesc->rs_mctid,
+                      0,
+                      &sdesc->rs_cbuf,
+                      NowTimeQual,
+                      0,
+                      (ScanKey) NULL);
+    }
+    
+    if (!ItemPointerIsValid(&sdesc->rs_mntid)) {
+       sdesc->rs_ntup = NULL;
+    } else {
+       sdesc->rs_ntup = (HeapTuple)
+           heapgettup(sdesc->rs_rd,
+                      &sdesc->rs_mntid,
+                      0,
+                      &sdesc->rs_nbuf,
+                      NowTimeQual,
+                      0,
+                      (ScanKey) NULL);
+    }
+}
diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c

new file mode 100644 (file)

index 0000000..b5ed7b9
--- /dev/null
+++ b/src/backend/access/heap/hio.c
@@ -0,0 +1,195 @@
+/*-------------------------------------------------------------------------
+ *
+ * hio.c--
+ *    POSTGRES heap access method input/output code.
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Id$
+ *
+ *-------------------------------------------------------------------------
+ */
+#include <string.h>
+
+#include "c.h"
+
+#include "access/heapam.h"
+#include "access/hio.h"
+#include "access/htup.h"
+
+#include "storage/block.h"
+#include "storage/buf.h"
+#include "storage/bufmgr.h"
+#include "storage/bufpage.h"
+#include "storage/itemid.h"
+#include "storage/itemptr.h"
+#include "storage/off.h"
+
+#include "utils/memutils.h"
+#include "utils/elog.h"
+#include "utils/rel.h"
+
+/*
+ * amputunique - place tuple at tid
+ *   Currently on errors, calls elog.  Perhaps should return -1?
+ *   Possible errors include the addition of a tuple to the page
+ *   between the time the linep is chosen and the page is L_UP'd.
+ *
+ *   This should be coordinated with the B-tree code.
+ *   Probably needs to have an amdelunique to allow for
+ *   internal index records to be deleted and reordered as needed.
+ *   For the heap AM, this should never be needed.
+ */
+void
+RelationPutHeapTuple(Relation relation,
+                    BlockNumber blockIndex,
+                    HeapTuple tuple)
+{
+    Buffer             buffer;
+    Page               pageHeader;
+    BlockNumber                numberOfBlocks;
+    OffsetNumber       offnum;
+    unsigned int       len;
+    ItemId             itemId;
+    Item               item;
+    
+    /* ----------------
+     * increment access statistics
+     * ----------------
+     */
+    IncrHeapAccessStat(local_RelationPutHeapTuple);
+    IncrHeapAccessStat(global_RelationPutHeapTuple);
+    
+    Assert(RelationIsValid(relation));
+    Assert(HeapTupleIsValid(tuple));
+    
+    numberOfBlocks = RelationGetNumberOfBlocks(relation);
+    Assert(blockIndex < numberOfBlocks);
+    
+    buffer = ReadBuffer(relation, blockIndex);
+#ifndef NO_BUFFERISVALID
+    if (!BufferIsValid(buffer)) {
+       elog(WARN, "RelationPutHeapTuple: no buffer for %ld in %s",
+            blockIndex, &relation->rd_rel->relname);
+    }
+#endif
+    
+    pageHeader = (Page)BufferGetPage(buffer);
+    len = (unsigned)DOUBLEALIGN(tuple->t_len); /* be conservative */
+    Assert((int)len <= PageGetFreeSpace(pageHeader));
+    
+    offnum = PageAddItem((Page)pageHeader, (Item)tuple,
+                        tuple->t_len, InvalidOffsetNumber, LP_USED);
+    
+    itemId = PageGetItemId((Page)pageHeader, offnum);
+    item = PageGetItem((Page)pageHeader, itemId);
+    
+    ItemPointerSet(&((HeapTuple)item)->t_ctid, blockIndex, offnum);
+    
+    WriteBuffer(buffer);
+    /* return an accurate tuple */
+    ItemPointerSet(&tuple->t_ctid, blockIndex, offnum);
+}
+
+/*
+ * The heap_insert routines "know" that a buffer page is initialized to
+ * zero when a BlockExtend operation is performed. 
+ */
+
+#define PageIsNew(page) ((page)->pd_upper == 0)
+
+/*
+ * This routine is another in the series of attempts to reduce the number
+ * of I/O's and system calls executed in the various benchmarks.  In
+ * particular, this routine is used to append data to the end of a relation
+ * file without excessive lseeks.  This code should do no more than 2 semops
+ * in the ideal case.
+ *
+ * Eventually, we should cache the number of blocks in a relation somewhere.
+ * Until that time, this code will have to do an lseek to determine the number
+ * of blocks in a relation.
+ * 
+ * This code should ideally do at most 4 semops, 1 lseek, and possibly 1 write
+ * to do an append; it's possible to eliminate 2 of the semops if we do direct
+ * buffer stuff (!); the lseek and the write can go if we get
+ * RelationGetNumberOfBlocks to be useful.
+ *
+ * NOTE: This code presumes that we have a write lock on the relation.
+ *
+ * Also note that this routine probably shouldn't have to exist, and does
+ * screw up the call graph rather badly, but we are wasting so much time and
+ * system resources being massively general that we are losing badly in our
+ * performance benchmarks.
+ */
+void
+RelationPutHeapTupleAtEnd(Relation relation, HeapTuple tuple)
+{
+    Buffer             buffer;
+    Page               pageHeader;
+    BlockNumber                lastblock;
+    OffsetNumber       offnum;
+    unsigned int       len;
+    ItemId             itemId;
+    Item               item;
+    
+    Assert(RelationIsValid(relation));
+    Assert(HeapTupleIsValid(tuple));
+    
+    /*
+     * XXX This does an lseek - VERY expensive - but at the moment it
+     * is the only way to accurately determine how many blocks are in
+     * a relation.  A good optimization would be to get this to actually
+     * work properly.
+     */
+    
+    lastblock = RelationGetNumberOfBlocks(relation);
+    
+    if (lastblock == 0)
+       {
+           buffer = ReadBuffer(relation, lastblock);
+           pageHeader = (Page)BufferGetPage(buffer);
+           if (PageIsNew((PageHeader) pageHeader))
+               {
+                   buffer = ReleaseAndReadBuffer(buffer, relation, P_NEW);
+                   pageHeader = (Page)BufferGetPage(buffer);
+                   PageInit(pageHeader, BufferGetPageSize(buffer), 0);
+               }
+       }
+    else
+       buffer = ReadBuffer(relation, lastblock - 1);
+    
+    pageHeader = (Page)BufferGetPage(buffer);
+    len = (unsigned)DOUBLEALIGN(tuple->t_len); /* be conservative */
+    
+    /*
+     * Note that this is true if the above returned a bogus page, which
+     * it will do for a completely empty relation.
+     */
+    
+    if (len > PageGetFreeSpace(pageHeader))
+       {
+           buffer = ReleaseAndReadBuffer(buffer, relation, P_NEW);
+           pageHeader = (Page)BufferGetPage(buffer);
+           PageInit(pageHeader, BufferGetPageSize(buffer), 0);
+           
+           if (len > PageGetFreeSpace(pageHeader))
+               elog(WARN, "Tuple is too big: size %d", len);
+       }
+    
+    offnum = PageAddItem((Page)pageHeader, (Item)tuple,
+                        tuple->t_len, InvalidOffsetNumber, LP_USED);
+    
+    itemId = PageGetItemId((Page)pageHeader, offnum);
+    item = PageGetItem((Page)pageHeader, itemId);
+    
+    lastblock = BufferGetBlockNumber(buffer);
+    
+    ItemPointerSet(&((HeapTuple)item)->t_ctid, lastblock, offnum);
+    
+    /* return an accurate tuple */
+    ItemPointerSet(&tuple->t_ctid, lastblock, offnum);
+    
+    WriteBuffer(buffer);
+}
diff --git a/src/backend/access/heap/stats.c b/src/backend/access/heap/stats.c

new file mode 100644 (file)

index 0000000..3e1bef6
--- /dev/null
+++ b/src/backend/access/heap/stats.c
@@ -0,0 +1,329 @@
+/*-------------------------------------------------------------------------
+ *
+ * stats.c--
+ *    heap access method debugging statistic collection routines
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header$
+ *
+ * NOTES
+ *    initam should be moved someplace else.
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/heapam.h"
+
+#include "utils/memutils.h"
+#include "utils/palloc.h"
+#include "utils/elog.h"
+#include "utils/mcxt.h"
+
+/* ----------------
+ *      InitHeapAccessStatistics
+ * ----------------
+ */
+HeapAccessStatistics heap_access_stats = (HeapAccessStatistics) NULL;
+     
+void
+InitHeapAccessStatistics()    
+{
+    MemoryContext    oldContext;
+    HeapAccessStatistics stats;
+    
+    /* ----------------
+     *  make sure we don't initialize things twice
+     * ----------------
+     */
+    if (heap_access_stats != NULL)
+        return;
+    
+    /* ----------------
+     *  allocate statistics structure from the top memory context
+     * ----------------
+     */
+    oldContext = MemoryContextSwitchTo(TopMemoryContext);
+    
+    stats = (HeapAccessStatistics)
+        palloc(sizeof(HeapAccessStatisticsData));
+    
+    /* ----------------
+     *  initialize fields to default values
+     * ----------------
+     */
+    stats->global_open = 0;                    
+    stats->global_openr = 0;
+    stats->global_close = 0;
+    stats->global_beginscan = 0;
+    stats->global_rescan = 0;
+    stats->global_endscan = 0;
+    stats->global_getnext = 0;
+    stats->global_fetch = 0;
+    stats->global_insert = 0;
+    stats->global_delete = 0;
+    stats->global_replace = 0;
+    stats->global_markpos = 0;
+    stats->global_restrpos = 0;
+    stats->global_BufferGetRelation = 0;
+    stats->global_RelationIdGetRelation = 0;
+    stats->global_RelationIdGetRelation_Buf = 0;
+    stats->global_getreldesc = 0;
+    stats->global_heapgettup = 0;
+    stats->global_RelationPutHeapTuple = 0;
+    stats->global_RelationPutLongHeapTuple = 0;
+    
+    stats->local_open = 0;
+    stats->local_openr = 0;
+    stats->local_close = 0;
+    stats->local_beginscan = 0;
+    stats->local_rescan = 0;
+    stats->local_endscan = 0;
+    stats->local_getnext = 0;
+    stats->local_fetch = 0;
+    stats->local_insert = 0;
+    stats->local_delete = 0;
+    stats->local_replace = 0;
+    stats->local_markpos = 0;
+    stats->local_restrpos = 0;
+    stats->local_BufferGetRelation = 0;
+    stats->local_RelationIdGetRelation = 0;
+    stats->local_RelationIdGetRelation_Buf = 0;
+    stats->local_getreldesc = 0;
+    stats->local_heapgettup = 0;
+    stats->local_RelationPutHeapTuple = 0;
+    stats->local_RelationPutLongHeapTuple = 0;
+    stats->local_RelationNameGetRelation = 0;
+    stats->global_RelationNameGetRelation = 0;
+    
+    /* ----------------
+     *  record init times
+     * ----------------
+     */
+    time(&stats->init_global_timestamp);
+    time(&stats->local_reset_timestamp);
+    time(&stats->last_request_timestamp);
+    
+    /* ----------------
+     *  return to old memory context
+     * ----------------
+     */
+    (void) MemoryContextSwitchTo(oldContext);
+    
+    heap_access_stats = stats;
+}
+
+/* ----------------
+ *      ResetHeapAccessStatistics
+ * ----------------
+ */
+void
+ResetHeapAccessStatistics()    
+{
+    HeapAccessStatistics stats;
+    
+    /* ----------------
+     *  do nothing if stats aren't initialized
+     * ----------------
+     */
+    if (heap_access_stats == NULL)
+        return;
+    
+    stats = heap_access_stats;
+    
+    /* ----------------
+     *  reset local counts
+     * ----------------
+     */
+    stats->local_open = 0;
+    stats->local_openr = 0;
+    stats->local_close = 0;
+    stats->local_beginscan = 0;
+    stats->local_rescan = 0;
+    stats->local_endscan = 0;
+    stats->local_getnext = 0;
+    stats->local_fetch = 0;
+    stats->local_insert = 0;
+    stats->local_delete = 0;
+    stats->local_replace = 0;
+    stats->local_markpos = 0;
+    stats->local_restrpos = 0;
+    stats->local_BufferGetRelation = 0;
+    stats->local_RelationIdGetRelation = 0;
+    stats->local_RelationIdGetRelation_Buf = 0;
+    stats->local_getreldesc = 0;
+    stats->local_heapgettup = 0;
+    stats->local_RelationPutHeapTuple = 0;
+    stats->local_RelationPutLongHeapTuple = 0;
+    
+    /* ----------------
+     *  reset local timestamps
+     * ----------------
+     */
+    time(&stats->local_reset_timestamp);
+    time(&stats->last_request_timestamp);
+}
+
+/* ----------------
+ *      GetHeapAccessStatistics
+ * ----------------
+ */
+HeapAccessStatistics GetHeapAccessStatistics()    
+{
+    HeapAccessStatistics stats;
+    
+    /* ----------------
+     *  return nothing if stats aren't initialized
+     * ----------------
+     */
+    if (heap_access_stats == NULL)
+        return NULL;
+    
+    /* ----------------
+     *  record the current request time
+     * ----------------
+     */
+    time(&heap_access_stats->last_request_timestamp);
+    
+    /* ----------------
+     *  allocate a copy of the stats and return it to the caller.
+     * ----------------
+     */
+    stats = (HeapAccessStatistics)
+        palloc(sizeof(HeapAccessStatisticsData));
+    
+     memmove(stats,
+            heap_access_stats,
+            sizeof(HeapAccessStatisticsData)); 
+    
+    return stats;
+}
+
+/* ----------------
+ *      PrintHeapAccessStatistics
+ * ----------------
+ */
+void
+PrintHeapAccessStatistics(HeapAccessStatistics stats)
+{
+    /* ----------------
+     *  return nothing if stats aren't valid
+     * ----------------
+     */
+    if (stats == NULL)
+        return;
+    
+    printf("======== heap am statistics ========\n");
+    printf("init_global_timestamp:      %s",
+           ctime(&(stats->init_global_timestamp)));
+    
+    printf("local_reset_timestamp:      %s",
+           ctime(&(stats->local_reset_timestamp)));
+    
+    printf("last_request_timestamp:     %s",
+           ctime(&(stats->last_request_timestamp)));
+    
+    printf("local/global_open:                        %6d/%6d\n",
+           stats->local_open, stats->global_open);
+    
+    printf("local/global_openr:                       %6d/%6d\n",
+           stats->local_openr, stats->global_openr);
+    
+    printf("local/global_close:                       %6d/%6d\n",
+           stats->local_close, stats->global_close);
+    
+    printf("local/global_beginscan:                   %6d/%6d\n",
+           stats->local_beginscan, stats->global_beginscan);
+    
+    printf("local/global_rescan:                      %6d/%6d\n",
+           stats->local_rescan, stats->global_rescan);
+    
+    printf("local/global_endscan:                     %6d/%6d\n",
+           stats->local_endscan, stats->global_endscan);
+    
+    printf("local/global_getnext:                     %6d/%6d\n",
+           stats->local_getnext, stats->global_getnext);
+    
+    printf("local/global_fetch:                       %6d/%6d\n",
+           stats->local_fetch, stats->global_fetch);
+    
+    printf("local/global_insert:                      %6d/%6d\n",
+           stats->local_insert, stats->global_insert);
+    
+    printf("local/global_delete:                      %6d/%6d\n",
+           stats->local_delete, stats->global_delete);
+    
+    printf("local/global_replace:                     %6d/%6d\n",
+           stats->local_replace, stats->global_replace);
+    
+    printf("local/global_markpos:                     %6d/%6d\n",
+           stats->local_markpos, stats->global_markpos);
+    
+    printf("local/global_restrpos:                    %6d/%6d\n",
+           stats->local_restrpos, stats->global_restrpos);
+    
+    printf("================\n");
+    
+    printf("local/global_BufferGetRelation:             %6d/%6d\n",
+           stats->local_BufferGetRelation,
+           stats->global_BufferGetRelation);
+    
+    printf("local/global_RelationIdGetRelation:         %6d/%6d\n",
+           stats->local_RelationIdGetRelation,
+           stats->global_RelationIdGetRelation);
+    
+    printf("local/global_RelationIdGetRelation_Buf:     %6d/%6d\n",
+           stats->local_RelationIdGetRelation_Buf,
+           stats->global_RelationIdGetRelation_Buf);
+    
+    printf("local/global_getreldesc:                    %6d/%6d\n",
+           stats->local_getreldesc, stats->global_getreldesc);
+    
+    printf("local/global_heapgettup:                    %6d/%6d\n",
+           stats->local_heapgettup, stats->global_heapgettup);
+    
+    printf("local/global_RelationPutHeapTuple:          %6d/%6d\n",
+           stats->local_RelationPutHeapTuple,
+           stats->global_RelationPutHeapTuple);
+    
+    printf("local/global_RelationPutLongHeapTuple:      %6d/%6d\n",
+           stats->local_RelationPutLongHeapTuple,
+           stats->global_RelationPutLongHeapTuple);
+    
+    printf("===================================\n");
+    
+    printf("\n");
+}
+
+/* ----------------
+ *      PrintAndFreeHeapAccessStatistics
+ * ----------------
+ */
+void
+PrintAndFreeHeapAccessStatistics(HeapAccessStatistics stats)
+{
+    PrintHeapAccessStatistics(stats);
+    if (stats != NULL)
+       pfree(stats);
+}
+
+/* ----------------------------------------------------------------
+ *                 access method initialization
+ * ----------------------------------------------------------------
+ */
+/* ----------------
+ *     initam should someday be moved someplace else.
+ * ----------------
+ */
+void
+initam()
+{
+    /* ----------------
+     * initialize heap statistics.
+     * ----------------
+     */
+    InitHeapAccessStatistics();
+}
diff --git a/src/backend/access/heapam.h b/src/backend/access/heapam.h

new file mode 100644 (file)

index 0000000..69f2f23
--- /dev/null
+++ b/src/backend/access/heapam.h
@@ -0,0 +1,149 @@
+/*-------------------------------------------------------------------------
+ *
+ * heapam.h--
+ *    POSTGRES heap access method definitions.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id$
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef        HEAPAM_H
+#define HEAPAM_H
+
+#include <sys/types.h>
+
+#include "postgres.h"
+
+#include "access/attnum.h"
+#include "access/htup.h"
+#include "access/relscan.h"
+#include "access/skey.h"
+#include "utils/tqual.h"
+#include "access/tupdesc.h"
+#include "storage/smgr.h"
+#include "utils/rel.h"
+
+/* ----------------------------------------------------------------
+ *             heap access method statistics
+ * ----------------------------------------------------------------
+ */
+
+typedef struct HeapAccessStatisticsData {
+    time_t  init_global_timestamp;     /* time global statistics started */
+    time_t  local_reset_timestamp;     /* last time local reset was done */
+    time_t  last_request_timestamp;    /* last time stats were requested */
+
+    int global_open;                   
+    int global_openr;
+    int global_close;
+    int global_beginscan;
+    int global_rescan;
+    int global_endscan;
+    int global_getnext;
+    int global_fetch;
+    int global_insert;
+    int global_delete;
+    int global_replace; 
+    int global_markpos; 
+    int global_restrpos;
+    int global_BufferGetRelation;
+    int global_RelationIdGetRelation;
+    int global_RelationIdGetRelation_Buf;
+    int global_RelationNameGetRelation;
+    int global_getreldesc;
+    int global_heapgettup;
+    int global_RelationPutHeapTuple;
+    int global_RelationPutLongHeapTuple;
+
+    int local_open;                    
+    int local_openr;
+    int local_close;
+    int local_beginscan;
+    int local_rescan;
+    int local_endscan;
+    int local_getnext;
+    int local_fetch;
+    int local_insert;
+    int local_delete;
+    int local_replace; 
+    int local_markpos; 
+    int local_restrpos;
+    int local_BufferGetRelation;
+    int local_RelationIdGetRelation;
+    int local_RelationIdGetRelation_Buf;
+    int local_RelationNameGetRelation;
+    int local_getreldesc;
+    int local_heapgettup;
+    int local_RelationPutHeapTuple;
+    int local_RelationPutLongHeapTuple;
+} HeapAccessStatisticsData;
+
+typedef HeapAccessStatisticsData *HeapAccessStatistics;
+
+#define IncrHeapAccessStat(x) \
+    (heap_access_stats == NULL ? 0 : (heap_access_stats->x)++)
+
+extern HeapAccessStatistics heap_access_stats; /* in stats.c */
+
+/* ----------------
+ *     function prototypes for heap access method
+ * ----------------
+ */
+/* heap_create, heap_creatr, and heap_destroy are declared in catalog/heap.h */
+#include "catalog/heap.h"
+
+/* heapam.c */
+extern void doinsert(Relation relation, HeapTuple tup);
+extern void SetHeapAccessMethodImmediateInvalidation(bool on);
+
+extern Relation heap_open(Oid relationId);
+extern Relation heap_openr(char *relationName);
+extern void heap_close(Relation relation);
+extern HeapScanDesc heap_beginscan(Relation relation, int atend,
+                           TimeQual timeQual, unsigned nkeys, ScanKey key);
+extern void heap_rescan(HeapScanDesc sdesc, bool scanFromEnd, ScanKey key);
+extern void heap_endscan(HeapScanDesc sdesc);
+extern HeapTuple heap_getnext(HeapScanDesc scandesc, int backw, Buffer *b);
+extern HeapTuple heap_fetch(Relation relation, TimeQual timeQual,
+                           ItemPointer tid, Buffer *b);
+extern Oid heap_insert(Relation relation, HeapTuple tup);
+extern void heap_delete(Relation relation, ItemPointer tid);
+extern int heap_replace(Relation relation, ItemPointer otid,
+                       HeapTuple tup);
+extern void heap_markpos(HeapScanDesc sdesc);
+extern void heap_restrpos(HeapScanDesc sdesc);
+
+/* in common/heaptuple.c */
+extern Size ComputeDataSize(TupleDesc tupleDesc, Datum value[], char nulls[]);
+extern void DataFill(char *data, TupleDesc tupleDesc,
+                    Datum value[], char nulls[], char *infomask,
+                    bits8 bit[]);
+extern int heap_attisnull(HeapTuple tup, int attnum);
+extern int heap_sysattrlen(AttrNumber attno);
+extern bool heap_sysattrbyval(AttrNumber attno);
+extern char *heap_getsysattr(HeapTuple tup, Buffer b, int attnum);
+extern char *fastgetattr(HeapTuple tup, unsigned attnum,
+                        TupleDesc att, bool *isnull);
+extern char *heap_getattr(HeapTuple tup, Buffer b, int attnum,
+                         TupleDesc att, bool *isnull);
+extern HeapTuple heap_copytuple(HeapTuple tuple);
+extern void heap_deformtuple(HeapTuple tuple, TupleDesc tdesc,
+                            Datum values[], char nulls[]);
+extern HeapTuple heap_formtuple(TupleDesc tupleDescriptor, 
+                               Datum value[], char nulls[]);
+extern HeapTuple heap_modifytuple(HeapTuple tuple, Buffer buffer,
+       Relation relation, Datum replValue[], char replNull[], char repl[]);
+HeapTuple heap_addheader(uint32        natts, int structlen, char *structure);
+
+/* in common/heap/stats.c */
+extern void InitHeapAccessStatistics(void);
+extern void ResetHeapAccessStatistics(void);
+extern HeapAccessStatistics GetHeapAccessStatistics(void);
+extern void PrintHeapAccessStatistics(HeapAccessStatistics stats);
+extern void PrintAndFreeHeapAccessStatistics(HeapAccessStatistics stats);
+extern void initam(void);
+
+#endif /* HEAPAM_H */
diff --git a/src/backend/access/hio.h b/src/backend/access/hio.h

new file mode 100644 (file)

index 0000000..6bb1139
--- /dev/null
+++ b/src/backend/access/hio.h
@@ -0,0 +1,26 @@
+/*-------------------------------------------------------------------------
+ *
+ * hio.h--
+ *    POSTGRES heap access method input/output definitions.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id$
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef        HIO_H
+#define HIO_H
+
+#include "c.h"
+
+#include "storage/block.h"
+#include "access/htup.h"
+#include "utils/rel.h"
+
+extern void RelationPutHeapTuple(Relation relation, BlockNumber blockIndex,
+                                HeapTuple tuple);
+extern void RelationPutHeapTupleAtEnd(Relation relation, HeapTuple tuple);
+
+#endif /* HIO_H */
diff --git a/src/backend/access/htup.h b/src/backend/access/htup.h

new file mode 100644 (file)

index 0000000..f1eb535
--- /dev/null
+++ b/src/backend/access/htup.h
@@ -0,0 +1,115 @@
+/*-------------------------------------------------------------------------
+ *
+ * htup.h--
+ *    POSTGRES heap tuple definitions.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id$
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef        HTUP_H
+#define HTUP_H
+
+#include "access/attnum.h"
+#include "storage/bufpage.h"           /* just to reduce levels of #include */
+#include "storage/itemptr.h"
+#include "utils/nabstime.h"
+
+#define MinHeapTupleBitmapSize 32              /* 8 * 4 */
+
+/* check these, they are likely to be more severely limited by t_hoff */
+
+#define MaxHeapAttributeNumber 1600            /* 8 * 200 */
+
+/*
+ * to avoid wasting space, the attributes should be layed out in such a
+ * way to reduce structure padding.
+ */
+typedef struct HeapTupleData {
+
+    unsigned int       t_len;          /* length of entire tuple */
+
+    ItemPointerData    t_ctid;         /* current TID of this tuple */
+
+    ItemPointerData    t_chain;        /* replaced tuple TID */
+
+    Oid                        t_oid;          /* OID of this tuple -- 4 bytes */
+
+    CommandId          t_cmin;         /* insert CID stamp -- 2 bytes each */
+    CommandId          t_cmax;         /* delete CommandId stamp */
+
+    TransactionId      t_xmin;         /* insert XID stamp -- 4 bytes each */
+    TransactionId      t_xmax;         /* delete XID stamp */
+
+    AbsoluteTime       t_tmin;         /* time stamps -- 4 bytes each */
+    AbsoluteTime       t_tmax; 
+
+    int16              t_natts;        /* number of attributes */
+    char               t_vtype;        /* not used - padding */
+
+    char               t_infomask;     /* whether tuple as null or variable
+                                        * length attributes
+                                        */
+
+    uint8              t_hoff;         /* sizeof tuple header */
+
+    bits8              t_bits[MinHeapTupleBitmapSize / 8];
+                                       /* bit map of domains */
+
+    /* MORE DATA FOLLOWS AT END OF STRUCT */
+} HeapTupleData;       
+
+typedef HeapTupleData  *HeapTuple;
+
+
+#define SelfItemPointerAttributeNumber         (-1)
+#define ObjectIdAttributeNumber                        (-2)
+#define MinTransactionIdAttributeNumber                (-3)
+#define MinCommandIdAttributeNumber            (-4)
+#define MaxTransactionIdAttributeNumber                (-5)
+#define MaxCommandIdAttributeNumber            (-6)
+#define ChainItemPointerAttributeNumber                (-7)
+#define AnchorItemPointerAttributeNumber       (-8)
+#define MinAbsoluteTimeAttributeNumber         (-9)
+#define MaxAbsoluteTimeAttributeNumber         (-10)
+#define VersionTypeAttributeNumber             (-11)
+#define FirstLowInvalidHeapAttributeNumber     (-12)
+
+
+/* ----------------
+ *     support macros
+ * ----------------
+ */
+#define GETSTRUCT(TUP) (((char *)(TUP)) + ((HeapTuple)(TUP))->t_hoff)
+
+
+/*
+ * BITMAPLEN(NATTS) - 
+ *     Computes minimum size of bitmap given number of domains.
+ */
+#define BITMAPLEN(NATTS) \
+       ((((((int)(NATTS) - 1) >> 3) + 4 - (MinHeapTupleBitmapSize >> 3)) \
+         & ~03) + (MinHeapTupleBitmapSize >> 3))
+
+/*
+ * HeapTupleIsValid
+ *     True iff the heap tuple is valid.
+ */
+#define        HeapTupleIsValid(tuple) PointerIsValid(tuple)
+
+/*
+ * information stored in t_infomask:
+ */
+#define HEAP_HASNULL           0x01    /* has null attribute(s) */
+#define        HEAP_HASVARLENA         0x02    /* has variable length attribute(s) */
+
+#define HeapTupleNoNulls(tuple) \
+       (!(((HeapTuple) (tuple))->t_infomask & HEAP_HASNULL))
+
+#define HeapTupleAllFixed(tuple) \
+       (!(((HeapTuple) (tuple))->t_infomask & HEAP_HASVARLENA))
+
+#endif /* HTUP_H */
diff --git a/src/backend/access/ibit.h b/src/backend/access/ibit.h

new file mode 100644 (file)

index 0000000..f25c04c
--- /dev/null
+++ b/src/backend/access/ibit.h
@@ -0,0 +1,34 @@
+/*-------------------------------------------------------------------------
+ *
+ * ibit.h--
+ *    POSTGRES index valid attribute bit map definitions.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id$
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef        IBIT_H
+#define IBIT_H
+
+#include "c.h"
+#include "utils/memutils.h"
+
+typedef struct IndexAttributeBitMapData {
+       char    bits[(MaxIndexAttributeNumber + MaxBitsPerByte - 1)
+               / MaxBitsPerByte];
+} IndexAttributeBitMapData;
+
+typedef IndexAttributeBitMapData       *IndexAttributeBitMap;
+
+#define IndexAttributeBitMapSize       sizeof(IndexAttributeBitMapData)
+
+/*
+ * IndexAttributeBitMapIsValid --
+ *     True iff attribute bit map is valid.
+ */
+#define        IndexAttributeBitMapIsValid(bits) PointerIsValid(bits)
+
+#endif /* IBIT_H */
diff --git a/src/backend/access/index/Makefile.inc b/src/backend/access/index/Makefile.inc

new file mode 100644 (file)

index 0000000..4ce0254
--- /dev/null
+++ b/src/backend/access/index/Makefile.inc
@@ -0,0 +1,14 @@
+#-------------------------------------------------------------------------
+#
+# Makefile.inc--
+#    Makefile for access/index
+#
+# Copyright (c) 1994, Regents of the University of California
+#
+#
+# IDENTIFICATION
+#    $Header$
+#
+#-------------------------------------------------------------------------
+
+SUBSRCS+= genam.c indexam.c istrat.c
diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c

new file mode 100644 (file)

index 0000000..e676280
--- /dev/null
+++ b/src/backend/access/index/genam.c
@@ -0,0 +1,275 @@
+/*-------------------------------------------------------------------------
+ *
+ * genam.c--
+ *    general index access method routines
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header$
+ *
+ * NOTES
+ *    many of the old access method routines have been turned into
+ *    macros and moved to genam.h -cim 4/30/91
+ *
+ *-------------------------------------------------------------------------
+ */
+/*
+ * OLD COMMENTS
+ * Scans are implemented as follows:
+ *
+ * `0' represents an invalid item pointer.
+ * `-' represents an unknown item pointer.
+ * `X' represents a known item pointers.
+ * `+' represents known or invalid item pointers.
+ * `*' represents any item pointers.
+ *
+ * State is represented by a triple of these symbols in the order of
+ * previous, current, next.  Note that the case of reverse scans works
+ * identically.
+ *
+ *     State   Result
+ * (1) + + -   + 0 0           (if the next item pointer is invalid)
+ * (2)         + X -           (otherwise)
+ * (3) * 0 0   * 0 0           (no change)
+ * (4) + X 0   X 0 0           (shift)
+ * (5) * + X   + X -           (shift, add unknown)
+ *
+ * All other states cannot occur.
+ *
+ * Note:
+ *It would be possible to cache the status of the previous and
+ *     next item pointer using the flags.
+ * ----------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/attnum.h"
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/itup.h"
+#include "access/relscan.h"
+#include "access/sdir.h"
+#include "access/skey.h"
+
+#include "storage/bufmgr.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/rel.h"
+
+#include "catalog/catname.h"
+#include "catalog/pg_attribute.h"
+#include "catalog/pg_index.h"
+#include "catalog/pg_proc.h"
+
+#include "catalog/index.h"
+
+/* ----------------------------------------------------------------
+ *     general access method routines
+ *
+ *     All indexed access methods use an identical scan structure.
+ *     We don't know how the various AMs do locking, however, so we don't
+ *     do anything about that here.
+ *
+ *     The intent is that an AM implementor will define a front-end routine
+ *     that calls this one, to fill in the scan, and then does whatever kind
+ *     of locking he wants.
+ * ----------------------------------------------------------------
+ */
+
+/* ----------------
+ *  RelationGetIndexScan -- Create and fill an IndexScanDesc.
+ *
+ *     This routine creates an index scan structure and sets its contents
+ *     up correctly. This routine calls AMrescan to set up the scan with
+ *     the passed key.
+ *
+ *     Parameters:
+ *             relation -- index relation for scan.
+ *             scanFromEnd -- if true, begin scan at one of the index's
+ *                            endpoints.
+ *             numberOfKeys -- count of scan keys (more than one won't
+ *                             necessarily do anything useful, yet).
+ *             key -- the ScanKey for the starting position of the scan.
+ *
+ *     Returns:
+ *             An initialized IndexScanDesc.
+ *
+ *     Side Effects:
+ *             Bumps the ref count on the relation to keep it in the cache.
+ *     
+ * ----------------
+ */
+IndexScanDesc
+RelationGetIndexScan(Relation relation,
+                    bool scanFromEnd,
+                    uint16 numberOfKeys,
+                    ScanKey key)
+{
+    IndexScanDesc      scan;
+    
+    if (! RelationIsValid(relation))
+       elog(WARN, "RelationGetIndexScan: relation invalid");
+    
+    scan = (IndexScanDesc) palloc(sizeof(IndexScanDescData));
+    
+    scan->relation = relation;
+    scan->opaque = NULL;
+    scan->numberOfKeys = numberOfKeys;
+    
+    ItemPointerSetInvalid(&scan->previousItemData);
+    ItemPointerSetInvalid(&scan->currentItemData);
+    ItemPointerSetInvalid(&scan->nextItemData);
+    ItemPointerSetInvalid(&scan->previousMarkData);
+    ItemPointerSetInvalid(&scan->currentMarkData);
+    ItemPointerSetInvalid(&scan->nextMarkData);
+
+    if (numberOfKeys > 0) {
+       scan->keyData = (ScanKey) palloc(sizeof(ScanKeyData) * numberOfKeys);
+    } else {
+       scan->keyData = NULL;
+    }
+
+    index_rescan(scan, scanFromEnd, key);
+    
+    return (scan);
+}
+
+/* ----------------
+ *  IndexScanRestart -- Restart an index scan.
+ *
+ *     This routine isn't used by any existing access method.  It's
+ *     appropriate if relation level locks are what you want.
+ *
+ *  Returns:
+ *     None.
+ *
+ *  Side Effects:
+ *     None.
+ * ----------------
+ */
+void
+IndexScanRestart(IndexScanDesc scan,
+                bool scanFromEnd,
+                ScanKey key)
+{
+    if (! IndexScanIsValid(scan))
+       elog(WARN, "IndexScanRestart: invalid scan");
+    
+    ItemPointerSetInvalid(&scan->previousItemData);
+    ItemPointerSetInvalid(&scan->currentItemData);
+    ItemPointerSetInvalid(&scan->nextItemData);
+    
+    if (RelationGetNumberOfBlocks(scan->relation) == 0) 
+       scan->flags = ScanUnmarked;
+    else if (scanFromEnd)
+       scan->flags = ScanUnmarked | ScanUncheckedPrevious;
+    else
+       scan->flags = ScanUnmarked | ScanUncheckedNext;
+    
+    scan->scanFromEnd = (bool) scanFromEnd;
+    
+    if (scan->numberOfKeys > 0)
+       memmove(scan->keyData,
+               key,
+               scan->numberOfKeys * sizeof(ScanKeyData));
+}
+
+/* ----------------
+ *  IndexScanEnd -- End and index scan.
+ *
+ *     This routine is not used by any existing access method, but is
+ *     suitable for use if you don't want to do sophisticated locking.
+ *
+ *  Returns:
+ *     None.
+ *
+ *  Side Effects:
+ *     None.
+ * ----------------
+ */
+void
+IndexScanEnd(IndexScanDesc scan)
+{
+    if (! IndexScanIsValid(scan))
+       elog(WARN, "IndexScanEnd: invalid scan");
+    
+    pfree(scan);
+}
+
+/* ----------------
+ *  IndexScanMarkPosition -- Mark current position in a scan.
+ *
+ *     This routine isn't used by any existing access method, but is the
+ *     one that AM implementors should use, if they don't want to do any
+ *     special locking.  If relation-level locking is sufficient, this is
+ *     the routine for you.
+ *
+ *  Returns:
+ *     None.
+ *
+ *  Side Effects:
+ *     None.
+ * ----------------
+ */
+void
+IndexScanMarkPosition(IndexScanDesc scan)
+{
+    RetrieveIndexResult        result;
+    
+    if (scan->flags & ScanUncheckedPrevious) {
+       result = 
+           index_getnext(scan, BackwardScanDirection);
+       
+       if (result != NULL) {
+           scan->previousItemData = result->index_iptr;
+       } else {
+           ItemPointerSetInvalid(&scan->previousItemData);
+       }
+       
+    } else if (scan->flags & ScanUncheckedNext) {
+       result = (RetrieveIndexResult)
+           index_getnext(scan, ForwardScanDirection);
+       
+       if (result != NULL) {
+           scan->nextItemData = result->index_iptr;
+       } else {
+           ItemPointerSetInvalid(&scan->nextItemData);
+       }
+    }
+    
+    scan->previousMarkData = scan->previousItemData;
+    scan->currentMarkData = scan->currentItemData;
+    scan->nextMarkData = scan->nextItemData;
+    
+    scan->flags = 0x0; /* XXX should have a symbolic name */
+}
+
+/* ----------------
+ *  IndexScanRestorePosition -- Restore position on a marked scan.
+ *
+ *     This routine isn't used by any existing access method, but is the
+ *     one that AM implementors should use if they don't want to do any
+ *     special locking.  If relation-level locking is sufficient, then
+ *     this is the one you want.
+ *
+ *  Returns:
+ *     None.
+ *
+ *  Side Effects:
+ *     None.
+ * ----------------
+ */
+void
+IndexScanRestorePosition(IndexScanDesc scan)
+{      
+    if (scan->flags & ScanUnmarked) 
+       elog(WARN, "IndexScanRestorePosition: no mark to restore");
+    
+    scan->previousItemData = scan->previousMarkData;
+    scan->currentItemData = scan->currentMarkData;
+    scan->nextItemData = scan->nextMarkData;
+    
+    scan->flags = 0x0; /* XXX should have a symbolic name */
+}
diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c

new file mode 100644 (file)

index 0000000..b1cd582
--- /dev/null
+++ b/src/backend/access/index/indexam.c
@@ -0,0 +1,411 @@
+/*-------------------------------------------------------------------------
+ *
+ * indexam.c--
+ *    general index access method routines
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header$
+ *
+ * INTERFACE ROUTINES
+ *     index_open      - open an index relation by relationId
+ *     index_openr     - open a index relation by name
+ *     index_close     - close a index relation
+ *     index_beginscan - start a scan of an index
+ *     index_rescan    - restart a scan of an index
+ *     index_endscan   - end a scan
+ *     index_insert    - insert an index tuple into a relation
+ *     index_delete    - delete an item from an index relation
+ *     index_markpos   - mark a scan position
+ *     index_restrpos  - restore a scan position
+ *     index_getnext   - get the next tuple from a scan
+ * **  index_fetch     - retrieve tuple with tid
+ * **  index_replace   - replace a tuple
+ * **  index_getattr   - get an attribute from an index tuple
+ *     index_getprocid - get a support procedure id from the rel tuple
+ *     
+ *     IndexScanIsValid - check index scan
+ *
+ * NOTES
+ *     This file contains the index_ routines which used
+ *     to be a scattered collection of stuff in access/genam.
+ *
+ *     The ** routines: index_fetch, index_replace, and index_getattr
+ *     have not yet been implemented.  They may not be needed.
+ *
+ * old comments
+ *     Scans are implemented as follows:
+ *
+ *     `0' represents an invalid item pointer.
+ *     `-' represents an unknown item pointer.
+ *     `X' represents a known item pointers.
+ *     `+' represents known or invalid item pointers.
+ *     `*' represents any item pointers.
+ *
+ *     State is represented by a triple of these symbols in the order of
+ *     previous, current, next.  Note that the case of reverse scans works
+ *     identically.
+ *
+ *             State   Result
+ *     (1)     + + -   + 0 0           (if the next item pointer is invalid)
+ *     (2)             + X -           (otherwise)
+ *     (3)     * 0 0   * 0 0           (no change)
+ *     (4)     + X 0   X 0 0           (shift)
+ *     (5)     * + X   + X -           (shift, add unknown)
+ *
+ *     All other states cannot occur.
+ *
+ *     Note: It would be possible to cache the status of the previous and
+ *           next item pointer using the flags.
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/attnum.h"
+#include "access/genam.h"
+#include "access/heapam.h"
+#include "access/itup.h"
+#include "access/relscan.h"
+#include "access/sdir.h"
+#include "access/skey.h"
+#include "access/funcindex.h"
+
+#include "storage/lmgr.h"
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/rel.h"
+#include "utils/relcache.h"
+
+#include "catalog/catname.h"
+#include "catalog/pg_attribute.h"
+#include "catalog/pg_index.h"
+#include "catalog/pg_proc.h"
+
+#include "catalog/index.h"
+
+#include "fmgr.h"
+
+/* ----------------
+ *   undefine macros we aren't going to use that would otherwise
+ *   get in our way..  delete is defined in c.h and the am's are
+ *   defined in heapam.h
+ * ----------------
+ */
+#undef delete
+#undef aminsert
+#undef amdelete
+#undef ambeginscan
+#undef amrescan
+#undef amendscan
+#undef ammarkpos
+#undef amrestrpos
+#undef amgettuple
+
+/* ----------------------------------------------------------------
+ *                 macros used in index_ routines
+ * ----------------------------------------------------------------
+ */
+#define RELATION_CHECKS \
+Assert(RelationIsValid(relation)); \
+        Assert(PointerIsValid(relation->rd_am))
+     
+#define SCAN_CHECKS \
+     Assert(IndexScanIsValid(scan)); \
+        Assert(RelationIsValid(scan->relation)); \
+        Assert(PointerIsValid(scan->relation->rd_am))
+     
+#define GET_REL_PROCEDURE(x,y) \
+        CppConcat(procedure = relation->rd_am->,y); \
+        if (! RegProcedureIsValid(procedure)) \
+        elog(WARN, "index_%s: invalid %s regproc", \
+             CppAsString(x), CppAsString(y))
+     
+#define GET_SCAN_PROCEDURE(x,y) \
+        CppConcat(procedure = scan->relation->rd_am->,y); \
+        if (! RegProcedureIsValid(procedure)) \
+        elog(WARN, "index_%s: invalid %s regproc", \
+             CppAsString(x), CppAsString(y))
+     
+     
+/* ----------------------------------------------------------------
+ *                index_ interface functions
+ * ----------------------------------------------------------------
+ */
+/* ----------------
+ *     index_open - open an index relation by relationId
+ *
+ *     presently the relcache routines do all the work we need
+ *     to open/close index relations.
+ * ----------------
+ */
+Relation
+index_open(Oid relationId)
+{
+    return RelationIdGetRelation(relationId);
+}
+
+/* ----------------
+ *     index_openr - open a index relation by name
+ *
+ *     presently the relcache routines do all the work we need
+ *     to open/close index relations.
+ * ----------------
+ */
+Relation
+index_openr(char *relationName)
+{
+    return RelationNameGetRelation(relationName);
+}
+
+/* ----------------
+ *     index_close - close a index relation
+ *
+ *     presently the relcache routines do all the work we need
+ *     to open/close index relations.
+ * ----------------
+ */
+void
+index_close(Relation relation)
+{
+    (void) RelationClose(relation);
+}
+
+/* ----------------
+ *     index_insert - insert an index tuple into a relation
+ * ----------------
+ */
+InsertIndexResult
+index_insert(Relation relation,
+            IndexTuple indexTuple)
+{
+    RegProcedure               procedure;
+    InsertIndexResult          specificResult;
+    
+    RELATION_CHECKS;
+    GET_REL_PROCEDURE(insert,aminsert);
+    
+    /* ----------------
+     * have the am's insert proc do all the work.  
+     * ----------------
+     */
+    specificResult = (InsertIndexResult)
+       fmgr(procedure, relation, indexTuple, NULL);
+    
+    /* ----------------
+     * the insert proc is supposed to return a "specific result" and
+     *  this routine has to return a "general result" so after we get
+     *  something back from the insert proc, we allocate a
+     *  "general result" and copy some crap between the two.
+     *
+     *  As far as I'm concerned all this result shit is needlessly c
+     *  omplicated and should be eliminated.  -cim 1/19/91
+     *
+     *  mao concurs.  regardless of how we feel here, however, it is
+     *  important to free memory we don't intend to return to anyone.
+     *  2/28/91
+     *
+     *  this "general result" crap is now gone. -ay 3/6/95
+     * ----------------
+     */
+    
+    return (specificResult);
+}
+
+/* ----------------
+ *     index_delete - delete an item from an index relation
+ * ----------------
+ */
+void
+index_delete(Relation relation, ItemPointer indexItem)
+{
+    RegProcedure       procedure;
+    
+    RELATION_CHECKS;
+    GET_REL_PROCEDURE(delete,amdelete);
+    
+    (void) fmgr(procedure, relation, indexItem);    
+}
+
+/* ----------------
+ *     index_beginscan - start a scan of an index
+ * ----------------
+ */
+IndexScanDesc
+index_beginscan(Relation relation,
+               bool scanFromEnd,
+               uint16 numberOfKeys,
+               ScanKey key)
+{
+    IndexScanDesc      scandesc;
+    RegProcedure       procedure;
+    
+    RELATION_CHECKS;
+    GET_REL_PROCEDURE(beginscan,ambeginscan);
+    
+    RelationSetRIntentLock(relation);
+    
+    scandesc = (IndexScanDesc)
+       fmgr(procedure, relation, scanFromEnd, numberOfKeys, key);
+    
+    return scandesc;
+}
+
+/* ----------------
+ *     index_rescan  - restart a scan of an index
+ * ----------------
+ */
+void
+index_rescan(IndexScanDesc scan, bool scanFromEnd, ScanKey key)
+{
+    RegProcedure       procedure;
+    
+    SCAN_CHECKS;
+    GET_SCAN_PROCEDURE(rescan,amrescan);
+    
+    (void) fmgr(procedure, scan, scanFromEnd, key);
+}
+
+/* ----------------
+ *     index_endscan - end a scan
+ * ----------------
+ */
+void
+index_endscan(IndexScanDesc scan)
+{
+    RegProcedure       procedure;
+    
+    SCAN_CHECKS;
+    GET_SCAN_PROCEDURE(endscan,amendscan);
+    
+    (void) fmgr(procedure, scan);
+    
+    RelationUnsetRIntentLock(scan->relation);
+}
+
+/* ----------------
+ *     index_markpos  - mark a scan position
+ * ----------------
+ */
+void
+index_markpos(IndexScanDesc scan)
+{
+    RegProcedure       procedure;
+    
+    SCAN_CHECKS;
+    GET_SCAN_PROCEDURE(markpos,ammarkpos);
+    
+    (void) fmgr(procedure, scan);
+}
+
+/* ----------------
+ *     index_restrpos  - restore a scan position
+ * ----------------
+ */
+void
+index_restrpos(IndexScanDesc scan)
+{
+    RegProcedure       procedure;
+    
+    SCAN_CHECKS;
+    GET_SCAN_PROCEDURE(restrpos,amrestrpos);
+    
+    (void) fmgr(procedure, scan);
+}
+
+/* ----------------
+ *     index_getnext - get the next tuple from a scan
+ *
+ *     A RetrieveIndexResult is a index tuple/heap tuple pair
+ * ----------------
+ */
+RetrieveIndexResult
+index_getnext(IndexScanDesc scan,
+             ScanDirection direction)
+{
+    RegProcedure               procedure;
+    RetrieveIndexResult                result;
+    
+    SCAN_CHECKS;
+    GET_SCAN_PROCEDURE(getnext,amgettuple);
+    
+    /* ----------------
+     * have the am's gettuple proc do all the work.  
+     * ----------------
+     */
+    result = (RetrieveIndexResult)
+       fmgr(procedure, scan, direction);
+    
+    return result;
+}
+
+/* ----------------
+ *     index_getprocid
+ *
+ *     Some indexed access methods may require support routines that are
+ *     not in the operator class/operator model imposed by pg_am.  These
+ *     access methods may store the OIDs of registered procedures they
+ *     need in pg_amproc.  These registered procedure OIDs are ordered in
+ *     a way that makes sense to the access method, and used only by the
+ *     access method.  The general index code doesn't know anything about
+ *     the routines involved; it just builds an ordered list of them for
+ *     each attribute on which an index is defined.
+ *
+ *     This routine returns the requested procedure OID for a particular
+ *     indexed attribute.
+ * ----------------
+ */
+RegProcedure
+index_getprocid(Relation irel,
+               AttrNumber attnum,
+               uint16 procnum)
+{
+    RegProcedure *loc;
+    int natts;
+    
+    natts = irel->rd_rel->relnatts;
+    
+    loc = irel->rd_support;
+
+    Assert(loc != NULL);
+    
+    return (loc[(natts * (procnum - 1)) + (attnum - 1)]);
+}
+
+Datum
+GetIndexValue(HeapTuple tuple,
+             TupleDesc hTupDesc,
+             int attOff,
+             AttrNumber attrNums[],
+             FuncIndexInfo *fInfo,
+             bool *attNull,
+             Buffer buffer)
+{
+    Datum returnVal;
+    bool       isNull;
+    
+    if (PointerIsValid(fInfo) && FIgetProcOid(fInfo) != InvalidOid) {
+       int i;
+       Datum *attData = (Datum *)palloc(FIgetnArgs(fInfo)*sizeof(Datum));
+       
+       for (i = 0; i < FIgetnArgs(fInfo); i++) {
+           attData[i] = (Datum) heap_getattr(tuple, 
+                                             buffer, 
+                                             attrNums[i], 
+                                             hTupDesc,
+                                             attNull);
+       }
+       returnVal = (Datum)fmgr_array_args(FIgetProcOid(fInfo),
+                                          FIgetnArgs(fInfo),
+                                          (char **) attData,
+                                          &isNull);
+       pfree(attData);
+       *attNull = FALSE;
+    }else {
+       returnVal = (Datum) heap_getattr(tuple, buffer, attrNums[attOff], 
+                                        hTupDesc, attNull);
+    }
+    return returnVal;
+}
diff --git a/src/backend/access/index/istrat.c b/src/backend/access/index/istrat.c

new file mode 100644 (file)

index 0000000..5f8c738
--- /dev/null
+++ b/src/backend/access/index/istrat.c
@@ -0,0 +1,679 @@
+/*-------------------------------------------------------------------------
+ *
+ * istrat.c--
+ *    index scan strategy manipulation code and index strategy manipulation
+ *    operator code.
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *    $Header$
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/attnum.h"
+#include "access/heapam.h"
+#include "access/istrat.h"
+#include "access/itup.h"       /* for MaxIndexAttributeNumber */
+#include "access/skey.h"
+#include "utils/tqual.h"       /* for NowTimeQual */
+
+#include "fmgr.h"
+#include "utils/elog.h"
+#include "utils/rel.h"
+
+#include "catalog/catname.h"
+#include "catalog/pg_amop.h"
+#include "catalog/pg_amproc.h"
+#include "catalog/pg_index.h"
+#include "catalog/pg_proc.h"
+
+/* ----------------------------------------------------------------
+ *                misc strategy support routines
+ * ----------------------------------------------------------------
+ */
+     
+/* 
+ *     StrategyNumberIsValid
+ *     StrategyNumberIsInBounds
+ *     StrategyMapIsValid
+ *     StrategyTransformMapIsValid
+ *     IndexStrategyIsValid
+ *
+ *             ... are now macros in istrat.h -cim 4/27/91
+ */
+     
+/*
+ * StrategyMapGetScanKeyEntry --
+ *     Returns a scan key entry of a index strategy mapping member.
+ *
+ * Note:
+ *     Assumes that the index strategy mapping is valid.
+ *     Assumes that the index strategy number is valid.
+ *     Bounds checking should be done outside this routine.
+ */
+ScanKey
+StrategyMapGetScanKeyEntry(StrategyMap map,
+                          StrategyNumber strategyNumber)
+{
+    Assert(StrategyMapIsValid(map));
+    Assert(StrategyNumberIsValid(strategyNumber));
+    return (&map->entry[strategyNumber - 1]);
+}
+
+/*
+ * IndexStrategyGetStrategyMap --
+ *     Returns an index strategy mapping of an index strategy.
+ *
+ * Note:
+ *     Assumes that the index strategy is valid.
+ *     Assumes that the number of index strategies is valid.
+ *     Bounds checking should be done outside this routine.
+ */
+StrategyMap
+IndexStrategyGetStrategyMap(IndexStrategy indexStrategy,
+                           StrategyNumber maxStrategyNum,
+                           AttrNumber attrNum)
+{
+    Assert(IndexStrategyIsValid(indexStrategy));
+    Assert(StrategyNumberIsValid(maxStrategyNum));
+    Assert(AttributeNumberIsValid(attrNum));
+    
+    maxStrategyNum = AMStrategies(maxStrategyNum);     /* XXX */
+    return
+       &indexStrategy->strategyMapData[maxStrategyNum * (attrNum - 1)];
+}
+
+/*
+ * AttributeNumberGetIndexStrategySize --
+ *     Computes the size of an index strategy.
+ */
+Size
+AttributeNumberGetIndexStrategySize(AttrNumber maxAttributeNumber,
+                                   StrategyNumber maxStrategyNumber)
+{
+    maxStrategyNumber = AMStrategies(maxStrategyNumber);       /* XXX */
+    return
+       maxAttributeNumber * maxStrategyNumber * sizeof (ScanKeyData);
+}
+
+/* 
+ * StrategyTransformMapIsValid is now a macro in istrat.h -cim 4/27/91
+ */
+
+/* ----------------
+ *     StrategyOperatorIsValid
+ * ----------------
+ */
+bool
+StrategyOperatorIsValid(StrategyOperator operator,
+                       StrategyNumber maxStrategy)
+{
+    return (bool)
+       (PointerIsValid(operator) &&
+        StrategyNumberIsInBounds(operator->strategy, maxStrategy) &&
+        !(operator->flags & ~(SK_NEGATE | SK_COMMUTE)));
+}
+
+/* ----------------
+ *     StrategyTermIsValid
+ * ----------------
+ */
+bool
+StrategyTermIsValid(StrategyTerm term,
+                   StrategyNumber maxStrategy)
+{
+    Index      index;
+    
+    if (! PointerIsValid(term) || term->degree == 0)
+       return false;
+    
+    for (index = 0; index < term->degree; index += 1) {
+       if (! StrategyOperatorIsValid(&term->operatorData[index],
+                                     maxStrategy)) {
+           
+           return false;
+       }
+    }
+    
+    return true;
+}
+
+/* ----------------
+ *     StrategyExpressionIsValid
+ * ----------------
+ */
+bool
+StrategyExpressionIsValid(StrategyExpression expression,
+                         StrategyNumber maxStrategy)
+{
+    StrategyTerm       *termP;
+    
+    if (!PointerIsValid(expression))
+       return true;
+    
+    if (!StrategyTermIsValid(expression->term[0], maxStrategy))
+       return false;
+    
+    termP = &expression->term[1];
+    while (StrategyTermIsValid(*termP, maxStrategy))
+       termP += 1;
+    
+    return (bool)
+       (! PointerIsValid(*termP));
+}
+
+/* ----------------
+ *     StrategyEvaluationIsValid
+ * ----------------
+ */
+bool
+StrategyEvaluationIsValid(StrategyEvaluation evaluation)
+{
+    Index      index;
+    
+    if (! PointerIsValid(evaluation) ||
+       ! StrategyNumberIsValid(evaluation->maxStrategy) ||
+       ! StrategyTransformMapIsValid(evaluation->negateTransform) ||
+       ! StrategyTransformMapIsValid(evaluation->commuteTransform) ||
+       ! StrategyTransformMapIsValid(evaluation->negateCommuteTransform)) {
+       
+       return false;
+    }
+    
+    for (index = 0; index < evaluation->maxStrategy; index += 1) {
+       if (! StrategyExpressionIsValid(evaluation->expression[index],
+                                       evaluation->maxStrategy)) {
+           
+           return false;
+       }
+    }
+    return true;
+}
+
+/* ----------------
+ *     StrategyTermEvaluate
+ * ----------------
+ */
+static bool
+StrategyTermEvaluate(StrategyTerm term,
+                    StrategyMap map,
+                    Datum left,
+                    Datum right)
+{
+    Index              index;
+    long               tmpres;
+    bool               result;
+    StrategyOperator   operator;
+    ScanKey            entry;
+    
+    for (index = 0, operator = &term->operatorData[0];
+        index < term->degree; index += 1, operator += 1) {
+       
+       entry = &map->entry[operator->strategy - 1];
+       
+       Assert(RegProcedureIsValid(entry->sk_procedure));
+       
+       switch (operator->flags ^ entry->sk_flags) {
+       case 0x0:
+           tmpres = (long) FMGR_PTR2(entry->sk_func, entry->sk_procedure,
+                                     left, right);
+           break;
+           
+       case SK_NEGATE:
+           tmpres = (long) !FMGR_PTR2(entry->sk_func, entry->sk_procedure,
+                                      left, right);
+           break;
+           
+       case SK_COMMUTE:
+           tmpres = (long) FMGR_PTR2(entry->sk_func, entry->sk_procedure,
+                                     right, left);
+           break;
+           
+       case SK_NEGATE | SK_COMMUTE:
+           tmpres = (long) !FMGR_PTR2(entry->sk_func, entry->sk_procedure,
+                                      right, left);
+           break;
+           
+       default:
+           elog(FATAL, "StrategyTermEvaluate: impossible case %d",
+                operator->flags ^ entry->sk_flags);
+       }
+       
+       result = (bool) tmpres;
+       if (!result)
+           return result;
+    }
+    
+    return result;
+}
+
+
+/* ----------------
+ *     RelationGetStrategy
+ * ----------------
+ */
+StrategyNumber
+RelationGetStrategy(Relation relation,
+                   AttrNumber attributeNumber,
+                   StrategyEvaluation evaluation,
+                   RegProcedure procedure)
+{
+    StrategyNumber     strategy;
+    StrategyMap                strategyMap;
+    ScanKey            entry;
+    Index              index;
+    int                numattrs;
+    
+    Assert(RelationIsValid(relation));
+    numattrs = RelationGetNumberOfAttributes(relation);
+    
+    Assert(relation->rd_rel->relkind == RELKIND_INDEX);        /* XXX use accessor */
+    Assert(AttributeNumberIsValid(attributeNumber));
+    Assert( (attributeNumber >= 1) && (attributeNumber < 1 + numattrs));
+    
+    Assert(StrategyEvaluationIsValid(evaluation));
+    Assert(RegProcedureIsValid(procedure));
+    
+    strategyMap =
+       IndexStrategyGetStrategyMap(RelationGetIndexStrategy(relation),
+                                   evaluation->maxStrategy,
+                                   attributeNumber);
+    
+    /* get a strategy number for the procedure ignoring flags for now */
+    for (index = 0; index < evaluation->maxStrategy; index += 1) {
+       if (strategyMap->entry[index].sk_procedure == procedure) {
+           break;
+       }
+    }
+    
+    if (index == evaluation->maxStrategy)
+       return InvalidStrategy;
+    
+    strategy = 1 + index;
+    entry = StrategyMapGetScanKeyEntry(strategyMap, strategy);
+    
+    Assert(!(entry->sk_flags & ~(SK_NEGATE | SK_COMMUTE)));
+    
+    switch (entry->sk_flags & (SK_NEGATE | SK_COMMUTE)) {
+    case 0x0:
+       return strategy;
+       
+    case SK_NEGATE:
+       strategy = evaluation->negateTransform->strategy[strategy - 1];
+       break;
+       
+    case SK_COMMUTE:
+       strategy = evaluation->commuteTransform->strategy[strategy - 1];
+       break;
+       
+    case SK_NEGATE | SK_COMMUTE:
+       strategy = evaluation->negateCommuteTransform->strategy[strategy - 1];
+       break;
+       
+    default:
+       elog(FATAL, "RelationGetStrategy: impossible case %d", entry->sk_flags);
+    }
+    
+    
+    if (! StrategyNumberIsInBounds(strategy, evaluation->maxStrategy)) {
+       if (! StrategyNumberIsValid(strategy)) {
+           elog(WARN, "RelationGetStrategy: corrupted evaluation");
+       }
+    }
+    
+    return strategy;
+}
+
+/* ----------------
+ *     RelationInvokeStrategy
+ * ----------------
+ */
+bool           /* XXX someday, this may return Datum */
+RelationInvokeStrategy(Relation relation,
+                      StrategyEvaluation evaluation,
+                      AttrNumber attributeNumber,
+                      StrategyNumber strategy,
+                      Datum left,
+                      Datum right)
+{
+    StrategyNumber     newStrategy;
+    StrategyMap                strategyMap;
+    ScanKey            entry;
+    StrategyTermData   termData;
+    int                numattrs;
+    
+    Assert(RelationIsValid(relation));
+    Assert(relation->rd_rel->relkind == RELKIND_INDEX);        /* XXX use accessor */
+    numattrs = RelationGetNumberOfAttributes(relation);
+    
+    Assert(StrategyEvaluationIsValid(evaluation));
+    Assert(AttributeNumberIsValid(attributeNumber));
+    Assert( (attributeNumber >= 1) && (attributeNumber < 1 + numattrs));
+
+    Assert(StrategyNumberIsInBounds(strategy, evaluation->maxStrategy));
+    
+    termData.degree = 1;
+    
+    strategyMap =
+       IndexStrategyGetStrategyMap(RelationGetIndexStrategy(relation),
+                                   evaluation->maxStrategy,
+                                   attributeNumber);
+    
+    entry = StrategyMapGetScanKeyEntry(strategyMap, strategy);
+    
+    if (RegProcedureIsValid(entry->sk_procedure)) {
+       termData.operatorData[0].strategy = strategy;
+       termData.operatorData[0].flags = 0x0;
+       
+       return
+           StrategyTermEvaluate(&termData, strategyMap, left, right);
+    }
+    
+    
+    newStrategy = evaluation->negateTransform->strategy[strategy - 1];
+    if (newStrategy != strategy && StrategyNumberIsValid(newStrategy)) {
+       
+       entry = StrategyMapGetScanKeyEntry(strategyMap, newStrategy);
+       
+       if (RegProcedureIsValid(entry->sk_procedure)) {
+           termData.operatorData[0].strategy = newStrategy;
+           termData.operatorData[0].flags = SK_NEGATE;
+           
+           return
+               StrategyTermEvaluate(&termData, strategyMap, left, right);
+       }
+    }
+    
+    newStrategy = evaluation->commuteTransform->strategy[strategy - 1];
+    if (newStrategy != strategy && StrategyNumberIsValid(newStrategy)) {
+       
+       entry = StrategyMapGetScanKeyEntry(strategyMap, newStrategy);
+       
+       if (RegProcedureIsValid(entry->sk_procedure)) {
+           termData.operatorData[0].strategy = newStrategy;
+           termData.operatorData[0].flags = SK_COMMUTE;
+           
+           return
+               StrategyTermEvaluate(&termData, strategyMap, left, right);
+       }
+    }
+    
+    newStrategy = evaluation->negateCommuteTransform->strategy[strategy - 1];
+    if (newStrategy != strategy && StrategyNumberIsValid(newStrategy)) {
+       
+       entry = StrategyMapGetScanKeyEntry(strategyMap, newStrategy);
+       
+       if (RegProcedureIsValid(entry->sk_procedure)) {
+           termData.operatorData[0].strategy = newStrategy;
+           termData.operatorData[0].flags = SK_NEGATE | SK_COMMUTE;
+           
+           return
+               StrategyTermEvaluate(&termData, strategyMap, left, right);
+       }
+    }
+    
+    if (PointerIsValid(evaluation->expression[strategy - 1])) {
+       StrategyTerm            *termP;
+       
+       termP = &evaluation->expression[strategy - 1]->term[0];
+       while (PointerIsValid(*termP)) {
+           Index       index;
+           
+           for (index = 0; index < (*termP)->degree; index += 1) {
+               entry = StrategyMapGetScanKeyEntry(strategyMap,
+                                                  (*termP)->operatorData[index].strategy);
+               
+               if (! RegProcedureIsValid(entry->sk_procedure)) {
+                   break;
+               }
+           }
+           
+           if (index == (*termP)->degree) {
+               return
+                   StrategyTermEvaluate(*termP, strategyMap, left, right);
+           }
+           
+           termP += 1;
+       }
+    }
+    
+    elog(WARN, "RelationInvokeStrategy: cannot evaluate strategy %d",
+        strategy);
+
+     /* not reached, just to make compiler happy */
+     return FALSE; 
+
+
+}
+
+/* ----------------
+ *     OperatorRelationFillScanKeyEntry
+ * ----------------
+ */
+static void
+OperatorRelationFillScanKeyEntry(Relation operatorRelation,
+                                Oid operatorObjectId,
+                                ScanKey entry)
+{
+    HeapScanDesc       scan;
+    ScanKeyData                scanKeyData;
+    HeapTuple          tuple;
+    
+    ScanKeyEntryInitialize(&scanKeyData, 0, 
+                          ObjectIdAttributeNumber,
+                          ObjectIdEqualRegProcedure,
+                          ObjectIdGetDatum(operatorObjectId));
+    
+    scan = heap_beginscan(operatorRelation, false, NowTimeQual,
+                         1, &scanKeyData);
+    
+    tuple = heap_getnext(scan, false, (Buffer *)NULL);
+    if (! HeapTupleIsValid(tuple)) {
+       elog(WARN, "OperatorObjectIdFillScanKeyEntry: unknown operator %lu",
+            (uint32) operatorObjectId);
+    }
+    
+    entry->sk_flags = 0;
+    entry->sk_procedure =
+       ((OperatorTupleForm) GETSTRUCT(tuple))->oprcode;
+    fmgr_info(entry->sk_procedure, &entry->sk_func, &entry->sk_nargs);
+    
+    if (! RegProcedureIsValid(entry->sk_procedure)) {
+       elog(WARN,
+            "OperatorObjectIdFillScanKeyEntry: no procedure for operator %lu",
+            (uint32) operatorObjectId);
+    }
+    
+    heap_endscan(scan);
+}
+
+
+/*
+ * IndexSupportInitialize --
+ *     Initializes an index strategy and associated support procedures.
+ */
+void
+IndexSupportInitialize(IndexStrategy indexStrategy,
+                      RegProcedure *indexSupport,
+                      Oid indexObjectId,
+                      Oid accessMethodObjectId,
+                      StrategyNumber maxStrategyNumber,
+                      StrategyNumber maxSupportNumber,
+                      AttrNumber maxAttributeNumber)
+{
+    Relation           relation;
+    Relation           operatorRelation;
+    HeapScanDesc       scan;
+    HeapTuple          tuple;
+    ScanKeyData        entry[2];
+    StrategyMap                map;
+    AttrNumber         attributeNumber;
+    int                        attributeIndex;
+    Oid                        operatorClassObjectId[ MaxIndexAttributeNumber ];
+    
+    maxStrategyNumber = AMStrategies(maxStrategyNumber);
+    
+    ScanKeyEntryInitialize(&entry[0], 0, Anum_pg_index_indexrelid,
+                          ObjectIdEqualRegProcedure, 
+                          ObjectIdGetDatum(indexObjectId));
+    
+    relation = heap_openr(IndexRelationName);
+    scan = heap_beginscan(relation, false, NowTimeQual, 1, entry);
+    tuple = heap_getnext(scan, 0, (Buffer *)NULL);
+    if (! HeapTupleIsValid(tuple))
+       elog(WARN, "IndexSupportInitialize: corrupted catalogs");
+    
+    /*
+     * XXX note that the following assumes the INDEX tuple is well formed and
+     * that the key[] and class[] are 0 terminated.
+     */
+    for (attributeIndex=0; attributeIndex<maxAttributeNumber; attributeIndex++)
+       {
+           IndexTupleForm      iform;
+           
+           iform = (IndexTupleForm) GETSTRUCT(tuple);
+           
+           if (!OidIsValid(iform->indkey[attributeIndex])) {
+               if (attributeIndex == 0) {
+                   elog(WARN, "IndexSupportInitialize: no pg_index tuple");
+               }
+               break;
+           }
+           
+           operatorClassObjectId[attributeIndex]
+               = iform->indclass[attributeIndex];
+       }
+    
+    heap_endscan(scan);
+    heap_close(relation);
+    
+    /* if support routines exist for this access method, load them */
+    if (maxSupportNumber > 0) {
+       
+       ScanKeyEntryInitialize(&entry[0], 0, Anum_pg_amproc_amid,
+                              ObjectIdEqualRegProcedure,
+                              ObjectIdGetDatum(accessMethodObjectId));
+       
+       ScanKeyEntryInitialize(&entry[1], 0, Anum_pg_amproc_amopclaid,
+                              ObjectIdEqualRegProcedure, 0);
+       
+/*     relation = heap_openr(Name_pg_amproc); */
+       relation = heap_openr(AccessMethodProcedureRelationName);
+
+       
+       for (attributeNumber = maxAttributeNumber; attributeNumber > 0;
+            attributeNumber--) {
+           
+           int16               support;
+           Form_pg_amproc      form;
+           RegProcedure        *loc;
+           
+           loc = &indexSupport[((attributeNumber - 1) * maxSupportNumber)];
+           
+           for (support = maxSupportNumber; --support >= 0; ) {
+               loc[support] = InvalidOid;
+           }
+           
+           entry[1].sk_argument =
+               ObjectIdGetDatum(operatorClassObjectId[attributeNumber - 1]);
+           
+           scan = heap_beginscan(relation, false, NowTimeQual, 2, entry);
+           
+           while (tuple = heap_getnext(scan, 0, (Buffer *)NULL),
+                  HeapTupleIsValid(tuple)) {
+               
+               form = (Form_pg_amproc) GETSTRUCT(tuple);
+               loc[(form->amprocnum - 1)] = form->amproc;
+           }
+           
+           heap_endscan(scan);
+       }
+       heap_close(relation);
+    }
+    
+    ScanKeyEntryInitialize(&entry[0], 0, 
+                          Anum_pg_amop_amopid,
+                           ObjectIdEqualRegProcedure,
+                           ObjectIdGetDatum(accessMethodObjectId));
+    
+    ScanKeyEntryInitialize(&entry[1], 0, 
+                          Anum_pg_amop_amopclaid,
+                           ObjectIdEqualRegProcedure, 0);
+    
+    relation = heap_openr(AccessMethodOperatorRelationName);
+    operatorRelation = heap_openr(OperatorRelationName);
+    
+    for (attributeNumber = maxAttributeNumber; attributeNumber > 0;
+        attributeNumber--) {
+       
+       StrategyNumber  strategy;
+       
+       entry[1].sk_argument =
+           ObjectIdGetDatum(operatorClassObjectId[attributeNumber - 1]);
+       
+       map = IndexStrategyGetStrategyMap(indexStrategy,
+                                         maxStrategyNumber,
+                                         attributeNumber);
+       
+       for (strategy = 1; strategy <= maxStrategyNumber; strategy++)
+           ScanKeyEntrySetIllegal(StrategyMapGetScanKeyEntry(map, strategy));
+       
+       scan = heap_beginscan(relation, false, NowTimeQual, 2, entry);
+       
+       while (tuple = heap_getnext(scan, 0, (Buffer *)NULL),
+              HeapTupleIsValid(tuple)) {
+           Form_pg_amop form;
+           
+           form = (Form_pg_amop) GETSTRUCT(tuple);
+           
+           OperatorRelationFillScanKeyEntry(operatorRelation,
+                                            form->amopopr,
+                                            StrategyMapGetScanKeyEntry(map, form->amopstrategy));
+       }
+       
+       heap_endscan(scan);
+    }
+    
+    heap_close(operatorRelation);
+    heap_close(relation);
+}
+
+/* ----------------
+ *     IndexStrategyDisplay
+ * ----------------
+ */
+#ifdef ISTRATDEBUG
+int
+IndexStrategyDisplay(IndexStrategy indexStrategy,
+                    StrategyNumber numberOfStrategies,
+                    int numberOfAttributes)
+{
+    StrategyMap        strategyMap;
+    AttrNumber attributeNumber;
+    StrategyNumber     strategyNumber;
+    
+    for (attributeNumber = 1; attributeNumber <= numberOfAttributes;
+        attributeNumber += 1) {
+       
+       strategyMap = IndexStrategyGetStrategyMap(indexStrategy,
+                                                 numberOfStrategies,
+                                                 attributeNumber);
+       
+       for (strategyNumber = 1;
+            strategyNumber <= AMStrategies(numberOfStrategies);
+            strategyNumber += 1) {
+           
+           printf(":att %d\t:str %d\t:opr 0x%x(%d)\n",
+                  attributeNumber, strategyNumber,
+                  strategyMap->entry[strategyNumber - 1].sk_procedure,
+                  strategyMap->entry[strategyNumber - 1].sk_procedure);
+       }
+    }
+}
+#endif /* defined(ISTRATDEBUG) */
+
+
diff --git a/src/backend/access/iqual.h b/src/backend/access/iqual.h

new file mode 100644 (file)

index 0000000..62b3b0d
--- /dev/null
+++ b/src/backend/access/iqual.h
@@ -0,0 +1,32 @@
+/*-------------------------------------------------------------------------
+ *
+ * iqual.h--
+ *    Index scan key qualification definitions.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id$
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef        IQUAL_H
+#define IQUAL_H
+
+#include "c.h"
+
+#include "storage/itemid.h"
+#include "utils/rel.h"
+#include "access/skey.h"
+
+/* ----------------
+ *     index tuple qualification support
+ * ----------------
+ */
+
+extern int NIndexTupleProcessed;
+
+extern bool index_keytest(IndexTuple tuple, TupleDesc tupdesc,
+                         int scanKeySize, ScanKey key);
+
+#endif /* IQUAL_H */
diff --git a/src/backend/access/istrat.h b/src/backend/access/istrat.h

new file mode 100644 (file)

index 0000000..b26b5ff
--- /dev/null
+++ b/src/backend/access/istrat.h
@@ -0,0 +1,80 @@
+/*-------------------------------------------------------------------------
+ *
+ * istrat.h--
+ *    POSTGRES index strategy definitions.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id$
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef        ISTRAT_H
+#define ISTRAT_H
+
+#include "postgres.h"
+#include "access/attnum.h"
+#include "access/skey.h"
+#include "access/strat.h"
+#include "utils/rel.h"         /* for Relation */
+
+/*
+ * StrategyNumberIsValid --
+ *     True iff the strategy number is valid.
+ */
+#define StrategyNumberIsValid(strategyNumber) \
+    ((bool) ((strategyNumber) != InvalidStrategy))
+
+/*
+ * StrategyNumberIsInBounds --
+ *     True iff strategy number is within given bounds.
+ *
+ * Note:
+ *     Assumes StrategyNumber is an unsigned type.
+ *     Assumes the bounded interval to be (0,max].
+ */
+#define StrategyNumberIsInBounds(strategyNumber, maxStrategyNumber) \
+    ((bool)(InvalidStrategy < (strategyNumber) && \
+           (strategyNumber) <= (maxStrategyNumber)))
+
+/*
+ * StrategyMapIsValid --
+ *     True iff the index strategy mapping is valid.
+ */
+#define        StrategyMapIsValid(map) PointerIsValid(map)
+
+/*
+ * IndexStrategyIsValid --
+ *     True iff the index strategy is valid.
+ */
+#define        IndexStrategyIsValid(s) PointerIsValid(s)
+
+extern ScanKey StrategyMapGetScanKeyEntry(StrategyMap map,
+                                         StrategyNumber strategyNumber);
+extern StrategyMap IndexStrategyGetStrategyMap(IndexStrategy indexStrategy,
+       StrategyNumber maxStrategyNum, AttrNumber attrNum);
+
+extern Size
+AttributeNumberGetIndexStrategySize(AttrNumber maxAttributeNumber,
+                                   StrategyNumber maxStrategyNumber);
+extern bool StrategyOperatorIsValid(StrategyOperator operator,
+                                   StrategyNumber maxStrategy);
+extern bool StrategyTermIsValid(StrategyTerm term,
+                               StrategyNumber maxStrategy);
+extern bool StrategyExpressionIsValid(StrategyExpression expression,
+                                     StrategyNumber maxStrategy);
+extern bool StrategyEvaluationIsValid(StrategyEvaluation evaluation);
+extern StrategyNumber RelationGetStrategy(Relation relation,
+       AttrNumber attributeNumber, StrategyEvaluation evaluation,
+       RegProcedure procedure);
+extern bool RelationInvokeStrategy(Relation relation,
+       StrategyEvaluation evaluation, AttrNumber attributeNumber,
+       StrategyNumber strategy, Datum left, Datum right);
+extern void IndexSupportInitialize(IndexStrategy indexStrategy,
+       RegProcedure *indexSupport, Oid indexObjectId,
+       Oid accessMethodObjectId, StrategyNumber maxStrategyNumber,
+       StrategyNumber maxSupportNumber, AttrNumber maxAttributeNumber);
+
+
+#endif /* ISTRAT_H */
diff --git a/src/backend/access/itup.h b/src/backend/access/itup.h

new file mode 100644 (file)

index 0000000..01fee16
--- /dev/null
+++ b/src/backend/access/itup.h
@@ -0,0 +1,104 @@
+/*-------------------------------------------------------------------------
+ *
+ * itup.h--
+ *    POSTGRES index tuple definitions.
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ * $Id$
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef ITUP_H
+#define ITUP_H
+
+#include "c.h"
+#include "access/ibit.h"
+#include "access/tupdesc.h"    /* for TupleDesc */
+#include "storage/itemptr.h"
+
+#define MaxIndexAttributeNumber        7
+
author	Marc G. Fournier <scrappy@hub.org>
	Tue, 9 Jul 1996 06:22:35 +0000 (06:22 +0000)
committer	Marc G. Fournier <scrappy@hub.org>
	Tue, 9 Jul 1996 06:22:35 +0000 (06:22 +0000)