#!/usr/bin/perl -w
# Copyright (c) Dave Horsfall.
# All rights reserved.
# 
# extended and rewritten by Andreas Schuldei
# for debian(-edu)
# 
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
# 3. Neither the name of the University nor the names of its contributors
#    may be used to endorse or promote products derived from this software
#    without specific prior written permission.
# 
# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
# 
# 
# @(#)$Id: rdnchk.e,v 1.12 2003-05-09 15:40:51+10 daveh Exp $
# $Log: rdnchk.e,v $
# Revision 1.12  2003-05-09 15:40:51+10  daveh
# Enforce single-value attributes for CI stuff.
#
# Revision 1.11  2003-05-08 16:46:47+10  daveh
# Add missing attributes when auto-fixing.
#
# Revision 1.10  2003-05-02 11:46:55+10  daveh
# Sort DNs before writing out, and minor mods.
#
# Revision 1.9  2003-04-24 10:39:19+10  daveh
# Always encode the userPassword attribute on output.
#
# Revision 1.8  2003-04-16 15:21:49+10  daveh
# Don't use non-core LDIF module; treat values as case-insensitive; binary.
#
# Revision 1.7  2003-04-02 11:16:20+10  daveh
# Delete ciComment (it's an attribute, not an objclass) and add ciLdapConfig
#
# Revision 1.6  2003-04-01 17:52:04+10  daveh
# Fix errors on request, and minor mods.
#
# Revision 1.5  2003-03-28 17:15:34+11  daveh
# More minor mods.
#
# Revision 1.4  2003-03-28 09:19:21+11  daveh
# Use reference to ARGV typeglob for input.
#
# Revision 1.3  2003-03-27 16:08:57+11  daveh
# Use Net::LDAP::LDIF, and minor speedups.
#
# Revision 1.2  2003-03-11 11:17:03+11  daveh
# Look for orphan DNs.
#
# Revision 1.1  2003-02-27 14:28:09+11  daveh
# Initial revision
#
#
# RDNCHK
#
# Given a slapcat input file, check for mismatched DN/RDN pairs etc.
# Optionally make fixes (use with care).
#
# The data structure is a hash of references to hashes of anonymous lists:
#
#   $entries{$dn} =	# $dn has been normalised
#   {
#     origDN => "original DN",
#     attr1 => [ "value1-a", "value1-b" ],
#     attr2 => [ "value2" ]
#   }
#
# which is accessed as (e.g):
#
#   @{entries{$dn}{"attr1"}}
#
# to return an array of the value(s) of $dn's attr1.
#
# Note that this structure is optimised for access to the DNs, *not*
# for searches.
#
# The DN is low-cased and leading/trailing/multiple spaces stripped
# (and the original stored for posterity).
#
# I assume that caseIgnoreMatch applies across the board, as otherwise
# it's too damned difficult.  This only fails, in practice, for encoded
# fields such as passwords, but I'm not looking at those (passwords are
# rarely, if ever, a candidate for being an RDN).  Remember: the specific
# purpose of this program is to perform a quick but reasonably thorough
# check for DN/RDN consistency, and it sorta grew from there.
#
# We can't use Perl Net::LDAP::LDIF, because it's not a core module
# (too hard to maintain our remote branches when upgrading).
#
# TODO:
#	Check custom stuff:
#
#	    ciDefPrinter is single-value per ciPrinterClass.
#	    Fundamentally difficult, because these are keys
#	    into printcap, not LDAP.
#

use Data::Dumper;
use Getopt::Long;
use MIME::Base64;
use strict;
use diagnostics;

my $origDN = '.origDN';	# Attribute stores original DN

# the command line options

my ($opt_dump, $opt_fix, $opt_inheritance, $opt_suffix, $opt_write,
    $opt_no_auth, $opt_org);

# some big hashes
my (%entries, %schema, @single);

my $suffix;


&parse_options;
$opt_write = 1 if $opt_fix;

#
# Process each entry.
# A list (returned in @_) holds each line, with the DN first.
#
while (@_ = &GetEntry)	# Loop per entry (exit on EOF)
{
    my $dn = shift @_;
    # Check if base64 encoded
    next if ! $dn =~ /^dn::? /i;
    my $encoded;
    if($dn =~ /^dn:: /i) {
      $dn =~ s/dn:: (.*)/$1/;
      $dn = decode_base64($dn);
      $dn =~ s/\s$//;
      $encoded = 1;
    } else {
      $dn =~ s/dn: (.*)/$1/;
      $encoded = 0;
    }
    my $cdn = &canon($dn);
    $entries{$cdn}{$origDN} = $dn;
    $entries{$cdn}{"encoded"} = $encoded;

    #
    # Infer the suffix.
    # Assume it's the shortest DN.
    #
    if (!$opt_suffix)
    {
	$suffix = $cdn
	    if (!defined $suffix) || (length $cdn < length $suffix);
    }

    #
    # Extract the first component (the RDN)
    # for later tests.
    #
    my ( $rdn, undef ) = split ( /,/, $cdn );
    my ( $rdnattr, $rdnval ) = split ( /=/, $rdn );

    #
    # Get the attributes/values.
    # Attributes are low-cased.
    #
    for (@_) {
        my ( $attr, $val ) = split ( /\s/, $_, 2 );    # In case of "::"
	$attr =~ s/://;
	if ($attr =~ /:/)			# Must be binary (base-64)
	{
	    $attr =~ s/://;
	    $val = &demime($val);
	}
	push @{$entries{$cdn}{lc $attr}}, $val;
    }

    #
    # Does the RDN exist?
    #
    if (!defined @{$entries{$cdn}{$rdnattr}})
    {
	print STDERR "dn: $dn\nMissing RDN";
	if ($opt_fix)
	{
	    push @{$entries{$cdn}{$rdnattr}}, $rdnval;
	    print STDERR "; inserted \"$rdnattr=$rdnval\"";
	}
	print STDERR "\n\n";
    }

    #
    # And how many?  Multiples are permitted
    # in some contexts, but not in ours.
    #
    my $attrs = $entries{$cdn}{$rdnattr};	# Actually a reference
    my $nrdn = @{$attrs};
    if ($nrdn > 1)
    {
	print STDERR "dn: $dn\nMultiple RDNs: \"@{$attrs}[0]\"";
	for (my $i = 1; $i < $nrdn; $i++)
	{
	    print STDERR ", \"@{$attrs}[$i]\"";
	}
	if ($opt_fix)
	{
	    print STDERR "; using \"$rdnval\"";
	    $entries{$cdn}{$rdnattr} = [ $rdnval ];
	}
	print STDERR "\n\n";
    }

    #
    # Do they match?
    #
    if (defined @{$attrs} && $rdnval ne &canon(@{$attrs}[0]))
    {
	print STDERR "dn: $dn\nMismatched RDN: \"$rdnattr=@{$attrs}[0]\"";
	if ($opt_fix)
	{
	    print STDERR "; using \"$rdnval\"";
	    $entries{$cdn}{$rdnattr} = [ $rdnval ];
	}
	print STDERR "\n\n";
    }

    #
    # Check single-value attributes.
    #
    foreach my $attr (@single)
    {
	my $nval = 0;
	my $attrs = $entries{$cdn}{lc $attr};
	$nval = @{$attrs} if defined @{$attrs};
	if ($nval > 1)
	{
	    print STDERR "dn: $dn\nMultiple attrs for \"$attr\": \"@{$attrs}[0]\"";
	    for (my $i = 1; $i < $nval; $i++)
	    {
		print STDERR ", \"@{$attrs}[$i]\"";
	    }
	    if ($opt_fix)
	    {
		print STDERR "; using \"@{$attrs}[0]\"";
		$entries{$cdn}{lc $attr} = [ @{$attrs}[0] ];
	    }
	    print STDERR "\n\n";
	}
    }

    #
    # Check the objectclass inheritance and hirarcy.
    #
    if ($opt_inheritance)	# Will soon be mandatory
    {
	my $obj_ref = objlist2hash( $entries{$cdn}{'objectclass'} );
	$obj_ref->{top} = 1 # it might be that top is not there yet. 
	    unless $obj_ref->{alias};

	# remove unknown object classes
	foreach my $i ( keys %$obj_ref ) {
	    next if $i eq "top";	# top is topless :-)
            unless ( $schema{objectclass}{$i} ) {
		# check if objectclass is known in the first place
		print STDERR "dn: $dn\nUnknown objectclass: \"$i\"";
		if ($opt_fix)
		{
		    print STDERR "; ignored";
                    delete $obj_ref->{$i};
		}
		print STDERR "\n\n";
	    }
	}
	
	#
        # check if we have one and only one structural 
	# object class and remove superfluous object classes.
	#
	my %structural_objectclasses;
	foreach my $i ( keys %$obj_ref ) {	
	    next if $i eq "top";    # top is topless :-)
	    
	    if ( $schema{objectclass}{$i}{structural} ) {
		$structural_objectclasses{$i}=1;
	    }
	}
	if  (1 < scalar keys %structural_objectclasses ) {
	    print STDERR "dn: $dn\nMore then one structural objectclass:";
	    for my $structural_objectclass ( keys %structural_objectclasses ) {
		print STDERR " \"$structural_objectclass\"";
	    }
	    print STDERR ".";
	    my $removable_objectclasses_ref = 
		resolve_structural_clash ( \%structural_objectclasses, $entries{$cdn} );
	    if ($opt_fix) {
		print STDERR " Removing ";
		for my $obj_class ( @$removable_objectclasses_ref ) {
		    print STDERR " \"$obj_class\"";
		    delete $obj_ref->{$obj_class};
		}
		print STDERR ".\n\n";
	    }
	}
	#
	# Now we find and add missing superior objectclasses
	#
	foreach my $i ( keys %$obj_ref ) {	
	    next if $i eq "top";    # top is topless :-)
	    for my $sup ( @{ $schema{objectclass}{$i}{sup} } ) {
		unless ( $obj_ref->{$sup} ) {
		    print STDERR "dn: $dn\nNo sup for \"$i\"";
		    if ($opt_fix) {
		    print STDERR "; inserted";
			$obj_ref->{$sup} = 1;
		}
		print STDERR "\n\n";
	    }
	    }    
	} 

    #
	# see if all mandatory attributes are there
    #
	my %must;
	for my $i ( keys %$obj_ref ) {	
	    next if $i eq "top";    # top is topless :-)
	    
	    for my $attrib_must ( @{ $schema{objectclass}{$i}{must} } ) {
		$must{ $attrib_must } = 1;
	    }
	}
	my %must_missing;
      MUST:
	for my $i ( keys %must ) {
	    next if ($i eq "cn"            or # there is no schema entry for cn!
		     $i eq "objectclass");    # or for objectclass
	    for my $name ( @{ $schema{attributetype}{$i}{names} } ) {
		next MUST if $entries{$cdn}{$name};
	    }
	    $must_missing{$i} = 1;
	}
	for my $i ( keys %must_missing ) {
	    print STDERR "dn: $dn\nAttribut \"$i\": mandatory but missing";
	    if ($opt_fix) {
		print STDERR "; inserted";
		$entries{$cdn}{$i} = [ "" ]; # FIXME: figure out proper syntax
	    }
	    print STDERR "\n\n";
	}

	# see if any attributes are orphans
	# everything is allowed with extensibleobject. skip this case
	unless ( $obj_ref->{extensibleobject} ) { 

	    my %attrib_all = %must;
	    foreach my $i ( keys %$obj_ref ) {
		
		next if $i eq "top";    # top is topless :-)
		
		for my $attrib_may ( @{ $schema{objectclass}{$i}{may} } ) {
		    $attrib_all{ $attrib_may } = 1;
		}
	    }
	    my %attrib_orphan;
	    for my $attrib ( keys %{ $entries{$cdn} } ) {
		next if ".origDN"     eq $attrib or
		    "encoded"         eq $attrib or
		    "objectclass"     eq $attrib or
		    "creatorsname"    eq $attrib or
		    "createtimestamp" eq $attrib or
		    "modifiersname"   eq $attrib or
		    "modifytimestamp" eq $attrib or
		    "cdn"             eq $attrib or
		    "dn"              eq $attrib;
		unless ( $attrib_all{ $attrib } ) {
		    $attrib_orphan{ $attrib } = 1;
		}
	    }
	    for my $i ( keys %attrib_orphan ) {
		print STDERR "dn: $dn\nAttribut \"$i\": is not part of objectclasses";
		for my $objclss ( keys %$obj_ref) {
		    print STDERR  " \"$objclss\""; 
		}
		if ($opt_fix) {
		    print STDERR "; removed";
		    delete $entries{$cdn}{$i}; 
    }
		print STDERR "\n\n";
	    }
	} # extensibleobject.
	# at this point we should have an entry with all needed sups, nicely cleand up 
	objhash2list( $entries{$cdn}{objectclass} , $obj_ref ); # back to the old format
    } # inheritance

    
} # main loop

#
# Make sure each entry has a parent.
# For now, we kill orphans on sight...
#
$suffix = $opt_suffix if $opt_suffix;
foreach my $thisdn (keys %entries)
{
    my $i = $thisdn;
    $i =~ s/[^,]*,//;
    if (!$entries{$i} && $thisdn ne &canon($suffix))
    {
	print STDERR "dn: $thisdn\nOrphan";
	if ($opt_fix)
	{
	    print STDERR "; deleted";
	    delete $entries{$thisdn};
	}
	print STDERR "\n\n";
    }

    # Fix up the suffix dn if it's our mess, adding a structural objectclass.
    if ($thisdn eq &canon($suffix)) {
	my $obj_ref = objlist2hash( $entries{$thisdn}{'objectclass'} );
	if ( ( 1 == keys %$obj_ref  
	       and $obj_ref->{dcobject} )
	     or 
	     ( 2 == keys %$obj_ref  
	       and $obj_ref->{dcobject} 
	       and $obj_ref->{top} )
	     )
		{
            if ( defined($opt_org) ) {
			push(@{$entries{$thisdn}{'objectclass'}}, 'organization');
			push(@{$entries{$thisdn}{'o'}}, $opt_org);
		} else {
			push(@{$entries{$thisdn}{'objectclass'}}, 'domain');
		}
	}
	# check for $classes == dcObject.
    }
}

print STDERR Dumper(%entries) if $opt_dump;

#
# Write out (possibly fixed) file if requested.
#
# The DN keys are sorted by length, which ensures that
# parents come before children.
#
if ($opt_write)
{
    foreach my $dn (sort { length($a) <=> length($b) } keys %entries)
    {
	&write_out($dn)
    }
}

exit 0;

###########################################################################

#
# Canonicalise a string.
# Delete leading/trailing blanks around commas, and lowcase.
#
sub canon
{
    ($_) = @_;
    s/\s+/ /g;	# Catch tabs as well
    s/ ,/,/g;
    s/, /,/g;
    lc;
}

#
# Write an entry to standard output.
#
# Ought to wrap at 78 cols as well.
#
sub write_out
{
    my ($dn) = @_;
    my $odn = $entries{$dn}{$origDN};
    if ($entries{$dn}{"encoded"} == 1) {
      my $encoded = encode_base64( $odn, "" );
      print "dn:: $encoded\n";
    } else {
      print "dn: $odn\n";
    }
    foreach my $attr ( keys %{ $entries{$dn} } ) {
        next if $attr eq $origDN or $attr eq "encoded" ;
        foreach my $value ( @{ $entries{$dn}{$attr} } ) {
	    print "$attr:";

            if ( defined $value and ( $attr =~ /userpassword/i
                || $value =~ /(^[ :]|[\x00-\x1f\x7f-\xff])/ )  )
	    {
		print ": ", &enmime($value, "");
	    }
            elsif ( defined $value and "" ne $value) { 
		print " $value";
	    }
	    print "\n";

        }
    }
    print "\n";
}

sub INIT {

    my $schema_ref = parse_schemas();
    %schema = %$schema_ref;

    #
    # Single-value attributes.
    #
    @single =
    (
	"ciAppType",
	"ciDBPath",
	"ciDomainName", 
	"ciLdapEnabled",
	"ciLdapServer",
	"ciOSType",
	"ciPortNum",
	"ciPrinterClass",
	"ciRegion",
	"ciStatus",
    );

    #
    # Random stuff.
    #
    $/ = "";		# Read input in paragraph mode
}

#
# Process options.
#
sub parse_options
{
    $SIG{'__WARN__'} = sub { die $_[0] };	# Exit on bad options

    Getopt::Long::Configure("bundling");	# Old-style (-xyz, --word)
    GetOptions
    (
	"--dump"	=> \$opt_dump,		# Dump data structure
	"-D"		=> \$opt_dump,

	"--fix"		=> \$opt_fix,		# Fix errors if possible
	"-f"		=> \$opt_fix,		# (also implies "write")

	"--inheritance"	=> \$opt_inheritance,	# Check obj inheritance
	"-i"		=> \$opt_inheritance,	# (too many false alarms)

	"--suffix=s"	=> \$opt_suffix,	# Specify directory suffix
	"-s=s"		=> \$opt_suffix,

	 "--write"	=> \$opt_write,		# Write ordered file
	 "-w"		=> \$opt_write,

	 "--org=s"	=> \$opt_org,		# Organization to use for
	 "-o=s"		=> \$opt_org,		# fixing up the suffix
    )
}

#
# Get a complete entry as a list of lines.
# We use the trick of setting the input delimiter
# to "", to read a paragraph at a time, so we can
# join continued lines.
#
sub GetEntry
{
    my @a;
    do
    {
	$_ = (<>);
	return () if !defined;	# EOF
	s/$/\n/;	# In case we strip last newline below
	s/#.*\n//g;	# Comments
	chomp;		# Always strips >= 2 newlines
	s/\n //g;	# Join lines
	@a = split /\n/;
    }
    while (@a < 2);	# Skips phantom entries (caused by comments)
    return @a;
}

#
# Given a string, return a de-mimed version.
# Can't use MIME::Base64 because it's not a core module.
# Instead, I pinched the code from it...
#
sub demime
{
    local($^W) = 0; # unpack("u",...) gives bogus warning in 5.00[123]

    my $str = shift;
    $str =~ tr|A-Za-z0-9+=/||cd;            # remove non-base64 chars
    if (length($str) % 4) {
	require Carp;
	Carp::carp("Length of base64 data not a multiple of 4")
    }
    $str =~ s/=+$//;                        # remove padding
    $str =~ tr|A-Za-z0-9+/| -_|;            # convert to uuencoded format

    return join'', map( unpack("u", chr(32 + length($_)*3/4) . $_),
	                $str =~ /(.{1,60})/gs);
}

#
# En-mime same.
# I didn't write this bletcherous code either.
#
sub enmime
{
    my $res = "";
    my $eol = $_[1];
    $eol = "\n" unless defined $eol;
    pos($_[0]) = 0;                          # ensure start at the beginning

    $res = join '', map( pack('u',$_)=~ /^.(\S*)/, ($_[0]=~/(.{1,45})/gs));

    $res =~ tr|` -_|AA-Za-z0-9+/|;               # `# help emacs
    # fix padding at the end
    my $padding = (3 - length($_[0]) % 3) % 3;
    $res =~ s/.{$padding}$/'=' x $padding/e if $padding;
    # break encoded string into lines of no more than 76 characters each
    if (length $eol) {
	$res =~ s/(.{1,76})/$1$eol/g;
    }
    return $res;
}

sub read_config {
    my ($file) = @_;
    
    open CONFIG,   "<  $file"  or die "can't open $file: $!";

    my %config;
    while ( <CONFIG> ) {
	chomp;
	s/\#.*//;
	s/^\s+//;
	s/\s+$//;
	next unless length;
	my ($var, $value) = split(/\s*=\s*/, $_, 2);
	$config{$var} = $value;
    }
    
    close CONFIG;
    
    return \%config;
}

sub read_slapd_config {
    my ($file) = @_;
    
    open CONFIG,   "<  $file"  or die "can't open $file: $!";

    my $seperator = $/; # save the seperator since it is non-standard
    undef $/;
    my $whole_file =  <CONFIG>; # sluuuurp
    $whole_file =~ s/\n(?!\n)\s+/ /g; # merge logical line as the ldap config parser does
    $/= $seperator; # restore the original line seperator;

    my @whole_file = split (/\n/, $whole_file);


    my %config;
    while ( @whole_file ) {
	$_ = pop @whole_file;
	chomp;
	s/\#.*//;
	s/^\s+//;
	s/\s+$//;
	next unless length;
	my ($var, $value) = split(/\s+/, $_, 2);
	push @{ $config{$var} }, $value; 

	# this does not keep the order of the configuration easily
	# accessable, which is bad for things like databases and
	# suffixes but does not matter for the include lines of the
	# schemas. With some efford one could pirce together the order
	# by getting it from the order in the anonymous arrays.

    }
    
    close CONFIG;
    
    return \%config;
}

sub extract_system_schemas {
    my ($core_schema, $schema_raw_ref) = @_;

    while ( $core_schema ) {
	$core_schema =~ s/^.*?\n\# system schema\n\#(.*?\))(\n\n.*)$/$2/s;
	last unless $1;
	my $core_def = $1;
	$core_def =~ s/\n(?!\n)\#\s+/ /g;
	chomp $core_def;
	$core_def =~s/^\s+//;
	$core_def =~s/\s+$//;
	next unless length $core_def;
	push @$schema_raw_ref, "$core_def\n"; 
    }
}

sub read_schema {
    my ($file) = @_;
    
    open SCHEMA,   "<  $file"  or die "can't open $file: $!";

    my $seperator = $/; # save the seperator since it is non-standard
    undef $/;
    my $whole_file =  <SCHEMA>; # sluuuurp

    my @schema_raw;    
    if ($file eq "/etc/ldap/schema/core.schema") {
	# There are some system schema entries which are hard coded
	# into openLdap.  They are marked "system schema" in the
	# core.schema. we try to detect them and remove the comments
	# in front of those.
	extract_system_schemas( $whole_file, \@schema_raw);
	
    }

    $whole_file =~ s/\n(?!\n)\s+/ /g; 
    # this is dubious, since we should watch not lines starting with 
    # whitespaces but balance the parantecies. but this works well.
    $/= $seperator; # restore the original line seperator;

    
    my @whole_file = split (/\n/, $whole_file);

    
    while ( @whole_file ) {
	$_ = pop @whole_file;
	chomp;
	s/\#.*//;
	s/^\s+//;
	s/\s+$//;
	next unless length;
	push @schema_raw, "$_\n"; 
    }
    
    close SCHEMA;
    return \@schema_raw;
}


sub find_slapd_config {

    my $defaults = "/etc/default/slapd";
    my $slapd_defaults_ref;
    
    if ( -f $defaults ) {
	$slapd_defaults_ref = read_config( $defaults );	
    }

    unless ( $slapd_defaults_ref->{SLAPD_CONF} and 
	 -f $slapd_defaults_ref->{SLAPD_CONF} ) 
    {
	$slapd_defaults_ref->{SLAPD_CONF} = "/etc/ldap/slapd.conf";
    }
	
    return $slapd_defaults_ref->{SLAPD_CONF};

}

sub parse_slapd_config {
    
    my ($slapd_config_file) = @_;
    
    my $slapd_config_href = read_slapd_config( $slapd_config_file );
    
    return $slapd_config_href;
}

sub find_active_schemas {
    my ($slapd_config_href) = @_;

    return \@{ $slapd_config_href->{include} }; 
}
sub preprocess_schemas {
    my ( $schemas_list_ref ) = @_;
    
    my @schemas_raw;
    
    for my $file ( @{$schemas_list_ref} ) {
	push @schemas_raw, @{ read_schema( $file ) }; 
    }
    return \@schemas_raw;
}

sub get_used_schemas {

    my $slapd_config_path = find_slapd_config();
    my $slapd_config_href = parse_slapd_config( $slapd_config_path );
    my $schemas_list_aref = find_active_schemas( $slapd_config_href );
    my $schemas_raw_aref = preprocess_schemas( $schemas_list_aref );
    return $schemas_raw_aref;
}

sub parse_schemas {

    my $schemas_raw_aref = get_used_schemas();
    
    my %schema;
    while ( @$schemas_raw_aref ) {
	$_= pop @$schemas_raw_aref;
	chomp;

	#poor man`s parser

	my ( $type ) =
	    /^(\w+)\s/;
	
	my ( $structural ) = 
	    /^.*\s(STRUCTURAL)\s.*$/;
	
	my ( $auxiliary ) = 	
	    /^.*\s(AUXILIARY)\s.*$/;
	
	my ( $description ) = 
	    /^.*\s+DESC\s+\'([^\']+)\'.*$/;

	my ( $syntax ) = 	
	    /^.*\s+SYNTAX\s+([\d\.\{\}]+).*$/;


	my @names;
 	if ( /^.*\s+NAME\s+\(\s*\'([\w\s\']+)\'\s*\).*$/  ) {
	     @names = split(/\'\s+\'/, lc $1);
	}
	elsif ( /^.*\s+NAME\s+\'(\w+)\'\s.*$/ ) {
	    push @names, lc $1;
	}
	
	my @sup;
	if ( /^.*\s+SUP\s+\(\s*([^\)]+?)\s*\).*$/ ) {
	    @sup = split(/\s*\$\s*/, lc $1);
	}
	elsif ( /^.*\s+SUP\s+(\w+)\s.*$/ ) {
	    push @sup, lc $1;
	}
	
	my @must;
	if ( /^.*\s+MUST\s+\(\s*([^\)]+?)\s*\).*$/ ) {
	    @must = split(/\s*\$\s*/, lc $1);
	}
	elsif ( /^.*\s+MUST\s+(\w+)\s.*$/ ) {
	    push @must, lc $1 ;
	}
	
	my @may;
	if ( /^.*\s+MAY\s+\(\s*([^\)]+?)\s*\).*$/ ) {
	    @may = split(/\s*\$\s*/, lc $1);
	}
	elsif ( /^.*\s+MAY\s+(\w+)\s.*$/ ) {
	    push @may, lc $1 ;
	}

	unless ($type eq "attributetype" or 
		$type eq "objectclass"   or 
		@names ) 
	{
	    print STDERR "$_\n"; 
	}
	else {
	    for my $name ( @names ) {
		$name = lc $name;

		$schema{$type}{$name}{names}       = \@names;
		$schema{$type}{$name}{description}= $description 
		    if $description;
		$schema{$type}{$name}{syntax}     = $syntax
		    if $syntax;
		$schema{$type}{$name}{structural} = 1
		    if $structural;
		$schema{$type}{$name}{auxiliary}  = 1
		    if $auxiliary;
		$schema{$type}{$name}{must}       = \@must 
		    if @must;
		$schema{$type}{$name}{may}        = \@may
		    if @may;
		$schema{$type}{$name}{sup}        = \@sup 
		    if @sup;
	    }
	}
    }
    return \%schema;
}

sub objlist2hash {
    my ($list_ref) = @_;
    
    my %objectclass;
    for my $objclass ( @$list_ref ) {
	$objclass = lc $objclass;
	$objectclass{$objclass } = 1;
    }
    
    return \%objectclass;
}

sub objhash2list {
    my ($array_ref , $obj_hash_ref ) = @_;
    
    @$array_ref = keys %$obj_hash_ref;
}

sub resolve_structural_clash {
    my ( $structural_objectclasses_ref, $entry_ref ) = @_;

    my @removable_objectclasses;

    # remove automountmap
    # i dont know good heuristics to decide which one i 
    # should remove, so this is hard coded.
    # what other common cases are there?
    if ($structural_objectclasses_ref->{automountmap}       and
	$structural_objectclasses_ref->{organizationalunit} and 
	2 == keys %$structural_objectclasses_ref ) {
	push @removable_objectclasses, "automountmap";
    }
    return \@removable_objectclasses;
}
