Utente:MaEr/fix section structure.pl

Da Wikipedia, l'enciclopedia libera.
Vai alla navigazione Vai alla ricerca
use strict;
# use Node;

my @lines;
while ( <STDIN> )
	{
	push ( @lines, $_ );
	}

@lines = &fix_section_structure( \@lines );

print @lines;

sub fix_section_structure
	{
	# we expect a reference to an array:
	my @lines = @{ shift() };
	# here we collect the Node objects:
	my @nodes;

	# we need a root node with 0 eq signs
	# because we cannot assume
	# that all second level nodes have "=="
	# (some might have "=")
	# we set the text field of that object to the empty string ('')
	# so we can print it:
	my $root_node = Node->new( '' );
	$root_node->number_of_eq( 0 );
	push( @nodes, $root_node );

	# convert lines into Node objects:
	foreach my $line ( @lines )
		{
		my $node = Node->new( $line );
		push( @nodes, $node );
		}

	# are we in a comment (0 = no, 1 = yes):
	my $in_comment = 0;
	# we don't start this loop at 0 but at 1
	# because node number 0 is the root node
	# which is used as a parent node for any other node:
	for ( my $i = 1; $i <= $#nodes; $i++ )
		{
		my $node = $nodes[ $i ];

		# here we try to remove uncommented titles from the node tree; for example:
		# if this line has a comment begin (<!--) and no comment end (-->):
		if ( $node->text() =~ m/<!--(.*)/ and $1 !~ m/-->/ )
			{
			$in_comment = 1;
			}
		# if this line has a comment end:
		if ( $node->text() =~ m/-->/ )
			{
			$in_comment = 0;
			}
		if ( $in_comment == 1 )
			{
			# don't count this node as a title but as an ordinary text line:
			$node->number_of_eq( undef() );
			}

		# undefined number -- this isn't a title:
		if ( not defined $node->number_of_eq() )
			{
			next;
			}
		# walk through the previous lines (nodes)
		# and look for the parent node
		# (the first node that has a smaller number of equal signs
		# is the parent node):
		my $j = $i - 1;
		while ()
			{
			# if we find a previous node with a smaller number of equal signs
			# we tie the two nodes to each other and stop searching:
			if ( defined $nodes[ $j ]->number_of_eq()
				and $nodes[ $j ]->number_of_eq() < $node->number_of_eq() )
				{
				$node->parent_node( $nodes[ $j ] );
				$nodes[ $j ]->add_child_node( $node );
				last;
				}
			# otherwise we continue searching:
			$j--;
			}
		}

	# now we set the root node to "="
	# because we need that node as a reliable base for the other nodes:
	$root_node->number_of_eq( 1 );
	&recurse( $root_node );

	# convert nodes to text:
	my @new_lines;
	foreach my $node ( @nodes )
		{
		push ( @new_lines, $node->text() );
		# debug:
		# print $node->node_info();
		}

	return @new_lines;
	}

# we walk through the node tree and correct the number of equal signs
# starting from the root node:
sub recurse
	{
	my $node = shift();

	my $a_ref = $node->get_child_nodes();
	# no children...
	if ( not defined $a_ref )
		{
		# ... no cry
		return;
		}
	my @child_nodes = @{ $a_ref };
	foreach my $child_node ( @child_nodes )
		{
		my $text = $child_node->text();
		# correct level:
		# if a node has "=="
		# the child nodes must have "==="
		my $eqs = '=' x ( $node->number_of_eq() + 1);
		$text =~ s/^=+/$eqs/;
		# commented out because there might be other text after the last '=' of the title
		# than only spaces and tabs (things like comments, templates etc):
		# my $result = $text =~ s/=+[ \t]*$/$eqs/;
		my $result = $text =~ s/=+([^=]*)$/$eqs$1/;
		warn ( "WARNING: could not correct equal signs in end of line $.: $text$/" )
			if ( $result != 1 );
		$child_node->text( $text );
		$child_node->number_of_eq( $node->number_of_eq() + 1 );
		&recurse( $child_node );
		}
	}

{
package Node;

# usage:
# my $node = Node->new( "the content of the line..." );
sub new
	{
	my ( $self, $text ) = @_;
	$self = { 'text' => $text };
	if ( $self->{'text'} =~ m/^(=+)/ )
		{
		$self->{'number_of_eq'} = length( $1 );
		}
	bless ( $self );
	return ( $self );
	}

sub text
	{
	my ( $self, $text ) = @_;
	if ( defined $text )
		{
		$self->{'text'} = $text;
		}
	else
		{
		return ( $self->{'text'} );
		}
	}

# usage:
# setting:
# $node->number_of_eq( 2 );
# getting:
# my $number = $node->number_of_eq();
sub number_of_eq
	{
	my ( $self, $number ) = @_;
	# commented out: if we test on defined we cannot enter an undefined value
	# so we rather count the number of arguments:
	# if ( defined $number )
	if ( $#_ == 1 )
		{
		$self->{'number_of_eq'} = $number;
		}
	else
		{
		return ( $self->{'number_of_eq'} );
		}
	}

# usage:
# setting:
# $node->parent_node( $some_other_node );
# getting:
# $parent_node = $node->parent_node();
sub parent_node
	{
	my ( $self, $parent_node ) = @_;
	if ( defined $parent_node )
		{
		$self->{'parent_node'} = $parent_node;
		}
	else
		{
		return $self->{'parent_node'};
		}
	}

# usage:
# $node->add_child_node( $some_other_node );
sub add_child_node
	{
	my ( $self, $child_node ) = @_;
	if ( defined $child_node )
		{
		push ( @{ $self->{'child_nodes'} }, $child_node );
		}
	}

# usage:
# my $child_nodes_ref = $node->get_child_nodes();
# my @child_nodes = @{ $child_nodes_ref };
sub get_child_nodes
	{
	my ( $self ) = @_;
	return $self->{'child_nodes'};
	}

# usage:
# $node->remove_child_node( $child_node );
sub remove_child_node
	{
	my ( $self, $node_to_be_removed ) = @_;
	if ( defined $node_to_be_removed )
		{
		# we copy all child nodes to @new_child_node_list except $node_to_be_removed
		# then we replace the existing child node list by the new one
		my @new_child_node_list;
		foreach my $child_node ( @{ $self->{'child_nodes'} } )
			{
			# text comparison: the text representation is something like "HASH(0x86f9850)"
			if ( $node_to_be_removed ne $child_node )
				{
				push ( @new_child_node_list, $child_node );
				}
			}
		$self->{'child_nodes'} = \@new_child_node_list;
		}
	}

# usage:
# print $node->node_info();
sub node_info
	{
	my ( $self ) = @_;

	my $node_text = $self->{'text'};
	chomp( $node_text );

	my $number = $self->{'number_of_eq'};
	if ( not defined $number )
		{
		$number = '-';
		}

	my $child_list_ref = $self->{'child_nodes'};
	my @child_list;
	my $child_count = '0';
	if ( defined $child_list_ref )
		{
		@child_list = @{ $child_list_ref };
		$child_count = $#child_list + 1;
		}

	my $parent_node = $self->{'parent_node'};
	my $parent_node_text;
	if ( defined $parent_node )
		{
		$parent_node_text = $parent_node->{'text'};
		chomp( $parent_node_text );
		}
	else
		{
		$parent_node_text = '-';
		}

	return "eq. signs: $number; children: $child_count; text: $node_text; parent node: $parent_node_text$/";
	}

1;
}